illumos-gate Wdiff usr/src/uts/common/os/taskq.c

Print this page

5881 corrected maxall vs. maxalloc in comments

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/taskq.c
          +++ new/usr/src/uts/common/os/taskq.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  28   28   */
  29   29  
  30   30  /*
  31   31   * Kernel task queues: general-purpose asynchronous task scheduling.
  32   32   *
  33   33   * A common problem in kernel programming is the need to schedule tasks
  34   34   * to be performed later, by another thread. There are several reasons
  35   35   * you may want or need to do this:
  36   36   *
  37   37   * (1) The task isn't time-critical, but your current code path is.
  38   38   *
  39   39   * (2) The task may require grabbing locks that you already hold.
  40   40   *
  41   41   * (3) The task may need to block (e.g. to wait for memory), but you
  42   42   *     cannot block in your current context.
  43   43   *
  44   44   * (4) Your code path can't complete because of some condition, but you can't
  45   45   *     sleep or fail, so you queue the task for later execution when condition
  46   46   *     disappears.
  47   47   *
  48   48   * (5) You just want a simple way to launch multiple tasks in parallel.
  49   49   *
  50   50   * Task queues provide such a facility. In its simplest form (used when
  51   51   * performance is not a critical consideration) a task queue consists of a
  52   52   * single list of tasks, together with one or more threads to service the
  53   53   * list. There are some cases when this simple queue is not sufficient:
  54   54   *
  55   55   * (1) The task queues are very hot and there is a need to avoid data and lock
  56   56   *      contention over global resources.
  57   57   *
  58   58   * (2) Some tasks may depend on other tasks to complete, so they can't be put in
  59   59   *      the same list managed by the same thread.
  60   60   *
  61   61   * (3) Some tasks may block for a long time, and this should not block other
  62   62   *      tasks in the queue.
  63   63   *
  64   64   * To provide useful service in such cases we define a "dynamic task queue"
  65   65   * which has an individual thread for each of the tasks. These threads are
  66   66   * dynamically created as they are needed and destroyed when they are not in
  67   67   * use. The API for managing task pools is the same as for managing task queues
  68   68   * with the exception of a taskq creation flag TASKQ_DYNAMIC which tells that
  69   69   * dynamic task pool behavior is desired.
  70   70   *

↓ open down ↓

70 lines elided

↑ open up ↑

  71   71   * Dynamic task queues may also place tasks in the normal queue (called "backing
  72   72   * queue") when task pool runs out of resources. Users of task queues may
  73   73   * disallow such queued scheduling by specifying TQ_NOQUEUE in the dispatch
  74   74   * flags.
  75   75   *
  76   76   * The backing task queue is also used for scheduling internal tasks needed for
  77   77   * dynamic task queue maintenance.
  78   78   *
  79   79   * INTERFACES ==================================================================
  80   80   *
  81      - * taskq_t *taskq_create(name, nthreads, pri, minalloc, maxall, flags);
       81 + * taskq_t *taskq_create(name, nthreads, pri, minalloc, maxalloc, flags);
  82   82   *
  83   83   *      Create a taskq with specified properties.
  84   84   *      Possible 'flags':
  85   85   *
  86   86   *        TASKQ_DYNAMIC: Create task pool for task management. If this flag is
  87   87   *              specified, 'nthreads' specifies the maximum number of threads in
  88   88   *              the task queue. Task execution order for dynamic task queues is
  89   89   *              not predictable.
  90   90   *
  91   91   *              If this flag is not specified (default case) a

  92   92   *              single-list task queue is created with 'nthreads' threads
  93   93   *              servicing it. Entries in this queue are managed by
  94   94   *              taskq_ent_alloc() and taskq_ent_free() which try to keep the
  95   95   *              task population between 'minalloc' and 'maxalloc', but the
  96   96   *              latter limit is only advisory for TQ_SLEEP dispatches and the
  97   97   *              former limit is only advisory for TQ_NOALLOC dispatches. If
  98   98   *              TASKQ_PREPOPULATE is set in 'flags', the taskq will be
  99   99   *              prepopulated with 'minalloc' task structures.
 100  100   *
 101  101   *              Since non-DYNAMIC taskqs are queues, tasks are guaranteed to be
 102  102   *              executed in the order they are scheduled if nthreads == 1.
 103  103   *              If nthreads > 1, task execution order is not predictable.
 104  104   *
 105  105   *        TASKQ_PREPOPULATE: Prepopulate task queue with threads.
 106  106   *              Also prepopulate the task queue with 'minalloc' task structures.
 107  107   *
 108  108   *        TASKQ_THREADS_CPU_PCT: This flag specifies that 'nthreads' should be
 109  109   *              interpreted as a percentage of the # of online CPUs on the
 110  110   *              system.  The taskq subsystem will automatically adjust the
 111  111   *              number of threads in the taskq in response to CPU online
 112  112   *              and offline events, to keep the ratio.  nthreads must be in
 113  113   *              the range [0,100].
 114  114   *
 115  115   *              The calculation used is:
 116  116   *
 117  117   *                      MAX((ncpus_online * percentage)/100, 1)
 118  118   *
 119  119   *              This flag is not supported for DYNAMIC task queues.
 120  120   *              This flag is not compatible with TASKQ_CPR_SAFE.

↓ open down ↓

29 lines elided

↑ open up ↑

 121  121   *
 122  122   *        TASKQ_CPR_SAFE: This flag specifies that users of the task queue will
 123  123   *              use their own protocol for handling CPR issues. This flag is not
 124  124   *              supported for DYNAMIC task queues.  This flag is not compatible
 125  125   *              with TASKQ_THREADS_CPU_PCT.
 126  126   *
 127  127   *      The 'pri' field specifies the default priority for the threads that
 128  128   *      service all scheduled tasks.
 129  129   *
 130  130   * taskq_t *taskq_create_instance(name, instance, nthreads, pri, minalloc,
 131      - *    maxall, flags);
      131 + *    maxalloc, flags);
 132  132   *
 133  133   *      Like taskq_create(), but takes an instance number (or -1 to indicate
 134  134   *      no instance).
 135  135   *
 136      - * taskq_t *taskq_create_proc(name, nthreads, pri, minalloc, maxall, proc,
      136 + * taskq_t *taskq_create_proc(name, nthreads, pri, minalloc, maxalloc, proc,
 137  137   *    flags);
 138  138   *
 139  139   *      Like taskq_create(), but creates the taskq threads in the specified
 140  140   *      system process.  If proc != &p0, this must be called from a thread
 141  141   *      in that process.
 142  142   *
 143      - * taskq_t *taskq_create_sysdc(name, nthreads, minalloc, maxall, proc,
      143 + * taskq_t *taskq_create_sysdc(name, nthreads, minalloc, maxalloc, proc,
 144  144   *    dc, flags);
 145  145   *
 146  146   *      Like taskq_create_proc(), but the taskq threads will use the
 147  147   *      System Duty Cycle (SDC) scheduling class with a duty cycle of dc.
 148  148   *
 149  149   * void taskq_destroy(tap):
 150  150   *
 151  151   *      Waits for any scheduled tasks to complete, then destroys the taskq.
 152  152   *      Caller should guarantee that no new tasks are scheduled in the closing
 153  153   *      taskq.

 154  154   *
 155  155   * taskqid_t taskq_dispatch(tq, func, arg, flags):
 156  156   *
 157  157   *      Dispatches the task "func(arg)" to taskq. The 'flags' indicates whether
 158  158   *      the caller is willing to block for memory.  The function returns an
 159  159   *      opaque value which is zero iff dispatch fails.  If flags is TQ_NOSLEEP
 160  160   *      or TQ_NOALLOC and the task can't be dispatched, taskq_dispatch() fails
 161  161   *      and returns (taskqid_t)0.
 162  162   *
 163  163   *      ASSUMES: func != NULL.
 164  164   *
 165  165   *      Possible flags:
 166  166   *        TQ_NOSLEEP: Do not wait for resources; may fail.
 167  167   *
 168  168   *        TQ_NOALLOC: Do not allocate memory; may fail.  May only be used with
 169  169   *              non-dynamic task queues.
 170  170   *
 171  171   *        TQ_NOQUEUE: Do not enqueue a task if it can't dispatch it due to
 172  172   *              lack of available resources and fail. If this flag is not
 173  173   *              set, and the task pool is exhausted, the task may be scheduled
 174  174   *              in the backing queue. This flag may ONLY be used with dynamic
 175  175   *              task queues.
 176  176   *
 177  177   *              NOTE: This flag should always be used when a task queue is used
 178  178   *              for tasks that may depend on each other for completion.
 179  179   *              Enqueueing dependent tasks may create deadlocks.
 180  180   *
 181  181   *        TQ_SLEEP:   May block waiting for resources. May still fail for
 182  182   *              dynamic task queues if TQ_NOQUEUE is also specified, otherwise
 183  183   *              always succeed.
 184  184   *
 185  185   *        TQ_FRONT:   Puts the new task at the front of the queue.  Be careful.
 186  186   *
 187  187   *      NOTE: Dynamic task queues are much more likely to fail in
 188  188   *              taskq_dispatch() (especially if TQ_NOQUEUE was specified), so it
 189  189   *              is important to have backup strategies handling such failures.
 190  190   *
 191  191   * void taskq_dispatch_ent(tq, func, arg, flags, tqent)
 192  192   *
 193  193   *      This is a light-weight form of taskq_dispatch(), that uses a
 194  194   *      preallocated taskq_ent_t structure for scheduling.  As a
 195  195   *      result, it does not perform allocations and cannot ever fail.
 196  196   *      Note especially that it cannot be used with TASKQ_DYNAMIC
 197  197   *      taskqs.  The memory for the tqent must not be modified or used
 198  198   *      until the function (func) is called.  (However, func itself
 199  199   *      may safely modify or free this memory, once it is called.)
 200  200   *      Note that the taskq framework will NOT free this memory.
 201  201   *
 202  202   * void taskq_wait(tq):
 203  203   *
 204  204   *      Waits for all previously scheduled tasks to complete.
 205  205   *
 206  206   *      NOTE: It does not stop any new task dispatches.
 207  207   *            Do NOT call taskq_wait() from a task: it will cause deadlock.
 208  208   *
 209  209   * void taskq_suspend(tq)
 210  210   *
 211  211   *      Suspend all task execution. Tasks already scheduled for a dynamic task
 212  212   *      queue will still be executed, but all new scheduled tasks will be
 213  213   *      suspended until taskq_resume() is called.
 214  214   *
 215  215   * int  taskq_suspended(tq)
 216  216   *
 217  217   *      Returns 1 if taskq is suspended and 0 otherwise. It is intended to
 218  218   *      ASSERT that the task queue is suspended.
 219  219   *
 220  220   * void taskq_resume(tq)
 221  221   *
 222  222   *      Resume task queue execution.
 223  223   *
 224  224   * int  taskq_member(tq, thread)
 225  225   *
 226  226   *      Returns 1 if 'thread' belongs to taskq 'tq' and 0 otherwise. The
 227  227   *      intended use is to ASSERT that a given function is called in taskq
 228  228   *      context only.
 229  229   *
 230  230   * system_taskq
 231  231   *
 232  232   *      Global system-wide dynamic task queue for common uses. It may be used by
 233  233   *      any subsystem that needs to schedule tasks and does not need to manage
 234  234   *      its own task queues. It is initialized quite early during system boot.
 235  235   *
 236  236   * IMPLEMENTATION ==============================================================
 237  237   *
 238  238   * This is schematic representation of the task queue structures.
 239  239   *
 240  240   *   taskq:
 241  241   *   +-------------+
 242  242   *   | tq_lock     | +---< taskq_ent_free()
 243  243   *   +-------------+ |
 244  244   *   |...          | | tqent:                  tqent:
 245  245   *   +-------------+ | +------------+          +------------+
 246  246   *   | tq_freelist |-->| tqent_next |--> ... ->| tqent_next |
 247  247   *   +-------------+   +------------+          +------------+
 248  248   *   |...          |   | ...        |          | ...        |
 249  249   *   +-------------+   +------------+          +------------+
 250  250   *   | tq_task     |    |
 251  251   *   |             |    +-------------->taskq_ent_alloc()
 252  252   * +--------------------------------------------------------------------------+
 253  253   * | |                     |            tqent                   tqent         |
 254  254   * | +---------------------+     +--> +------------+     +--> +------------+  |
 255  255   * | | ...                 |     |    | func, arg  |     |    | func, arg  |  |
 256  256   * +>+---------------------+ <---|-+  +------------+ <---|-+  +------------+  |
 257  257   *   | tq_taskq.tqent_next | ----+ |  | tqent_next | --->+ |  | tqent_next |--+
 258  258   *   +---------------------+       |  +------------+     ^ |  +------------+
 259  259   * +-| tq_task.tqent_prev  |       +--| tqent_prev |     | +--| tqent_prev |  ^
 260  260   * | +---------------------+          +------------+     |    +------------+  |
 261  261   * | |...                  |          | ...        |     |    | ...        |  |
 262  262   * | +---------------------+          +------------+     |    +------------+  |
 263  263   * |                                      ^              |                    |
 264  264   * |                                      |              |                    |
 265  265   * +--------------------------------------+--------------+       TQ_APPEND() -+
 266  266   *   |             |                      |
 267  267   *   |...          |   taskq_thread()-----+
 268  268   *   +-------------+
 269  269   *   | tq_buckets  |--+-------> [ NULL ] (for regular task queues)
 270  270   *   +-------------+  |
 271  271   *                    |   DYNAMIC TASK QUEUES:
 272  272   *                    |
 273  273   *                    +-> taskq_bucket[nCPU]            taskq_bucket_dispatch()
 274  274   *                        +-------------------+                    ^
 275  275   *                   +--->| tqbucket_lock     |                    |
 276  276   *                   |    +-------------------+   +--------+      +--------+
 277  277   *                   |    | tqbucket_freelist |-->| tqent  |-->...| tqent  | ^
 278  278   *                   |    +-------------------+<--+--------+<--...+--------+ |
 279  279   *                   |    | ...               |   | thread |      | thread | |
 280  280   *                   |    +-------------------+   +--------+      +--------+ |
 281  281   *                   |    +-------------------+                              |
 282  282   * taskq_dispatch()--+--->| tqbucket_lock     |             TQ_APPEND()------+
 283  283   *      TQ_HASH()    |    +-------------------+   +--------+      +--------+
 284  284   *                   |    | tqbucket_freelist |-->| tqent  |-->...| tqent  |
 285  285   *                   |    +-------------------+<--+--------+<--...+--------+
 286  286   *                   |    | ...               |   | thread |      | thread |
 287  287   *                   |    +-------------------+   +--------+      +--------+
 288  288   *                   +--->      ...
 289  289   *
 290  290   *
 291  291   * Task queues use tq_task field to link new entry in the queue. The queue is a
 292  292   * circular doubly-linked list. Entries are put in the end of the list with
 293  293   * TQ_APPEND() and processed from the front of the list by taskq_thread() in
 294  294   * FIFO order. Task queue entries are cached in the free list managed by
 295  295   * taskq_ent_alloc() and taskq_ent_free() functions.
 296  296   *
 297  297   *      All threads used by task queues mark t_taskq field of the thread to
 298  298   *      point to the task queue.
 299  299   *
 300  300   * Taskq Thread Management -----------------------------------------------------
 301  301   *
 302  302   * Taskq's non-dynamic threads are managed with several variables and flags:
 303  303   *
 304  304   *      * tq_nthreads   - The number of threads in taskq_thread() for the
 305  305   *                        taskq.
 306  306   *
 307  307   *      * tq_active     - The number of threads not waiting on a CV in
 308  308   *                        taskq_thread(); includes newly created threads
 309  309   *                        not yet counted in tq_nthreads.
 310  310   *
 311  311   *      * tq_nthreads_target
 312  312   *                      - The number of threads desired for the taskq.
 313  313   *
 314  314   *      * tq_flags & TASKQ_CHANGING
 315  315   *                      - Indicates that tq_nthreads != tq_nthreads_target.
 316  316   *
 317  317   *      * tq_flags & TASKQ_THREAD_CREATED
 318  318   *                      - Indicates that a thread is being created in the taskq.
 319  319   *
 320  320   * During creation, tq_nthreads and tq_active are set to 0, and
 321  321   * tq_nthreads_target is set to the number of threads desired.  The
 322  322   * TASKQ_CHANGING flag is set, and taskq_thread_create() is called to
 323  323   * create the first thread. taskq_thread_create() increments tq_active,
 324  324   * sets TASKQ_THREAD_CREATED, and creates the new thread.
 325  325   *
 326  326   * Each thread starts in taskq_thread(), clears the TASKQ_THREAD_CREATED
 327  327   * flag, and increments tq_nthreads.  It stores the new value of
 328  328   * tq_nthreads as its "thread_id", and stores its thread pointer in the
 329  329   * tq_threadlist at the (thread_id - 1).  We keep the thread_id space
 330  330   * densely packed by requiring that only the largest thread_id can exit during
 331  331   * normal adjustment.   The exception is during the destruction of the
 332  332   * taskq; once tq_nthreads_target is set to zero, no new threads will be created
 333  333   * for the taskq queue, so every thread can exit without any ordering being
 334  334   * necessary.
 335  335   *
 336  336   * Threads will only process work if their thread id is <= tq_nthreads_target.
 337  337   *
 338  338   * When TASKQ_CHANGING is set, threads will check the current thread target
 339  339   * whenever they wake up, and do whatever they can to apply its effects.
 340  340   *
 341  341   * TASKQ_THREAD_CPU_PCT --------------------------------------------------------
 342  342   *
 343  343   * When a taskq is created with TASKQ_THREAD_CPU_PCT, we store their requested
 344  344   * percentage in tq_threads_ncpus_pct, start them off with the correct thread
 345  345   * target, and add them to the taskq_cpupct_list for later adjustment.
 346  346   *
 347  347   * We register taskq_cpu_setup() to be called whenever a CPU changes state.  It
 348  348   * walks the list of TASKQ_THREAD_CPU_PCT taskqs, adjusts their nthread_target
 349  349   * if need be, and wakes up all of the threads to process the change.
 350  350   *
 351  351   * Dynamic Task Queues Implementation ------------------------------------------
 352  352   *
 353  353   * For a dynamic task queues there is a 1-to-1 mapping between a thread and
 354  354   * taskq_ent_structure. Each entry is serviced by its own thread and each thread
 355  355   * is controlled by a single entry.
 356  356   *
 357  357   * Entries are distributed over a set of buckets. To avoid using modulo
 358  358   * arithmetics the number of buckets is 2^n and is determined as the nearest
 359  359   * power of two roundown of the number of CPUs in the system. Tunable
 360  360   * variable 'taskq_maxbuckets' limits the maximum number of buckets. Each entry
 361  361   * is attached to a bucket for its lifetime and can't migrate to other buckets.
 362  362   *
 363  363   * Entries that have scheduled tasks are not placed in any list. The dispatch
 364  364   * function sets their "func" and "arg" fields and signals the corresponding
 365  365   * thread to execute the task. Once the thread executes the task it clears the
 366  366   * "func" field and places an entry on the bucket cache of free entries pointed
 367  367   * by "tqbucket_freelist" field. ALL entries on the free list should have "func"
 368  368   * field equal to NULL. The free list is a circular doubly-linked list identical
 369  369   * in structure to the tq_task list above, but entries are taken from it in LIFO
 370  370   * order - the last freed entry is the first to be allocated. The
 371  371   * taskq_bucket_dispatch() function gets the most recently used entry from the
 372  372   * free list, sets its "func" and "arg" fields and signals a worker thread.
 373  373   *
 374  374   * After executing each task a per-entry thread taskq_d_thread() places its
 375  375   * entry on the bucket free list and goes to a timed sleep. If it wakes up
 376  376   * without getting new task it removes the entry from the free list and destroys
 377  377   * itself. The thread sleep time is controlled by a tunable variable
 378  378   * `taskq_thread_timeout'.
 379  379   *
 380  380   * There are various statistics kept in the bucket which allows for later
 381  381   * analysis of taskq usage patterns. Also, a global copy of taskq creation and
 382  382   * death statistics is kept in the global taskq data structure. Since thread
 383  383   * creation and death happen rarely, updating such global data does not present
 384  384   * a performance problem.
 385  385   *
 386  386   * NOTE: Threads are not bound to any CPU and there is absolutely no association
 387  387   *       between the bucket and actual thread CPU, so buckets are used only to
 388  388   *       split resources and reduce resource contention. Having threads attached
 389  389   *       to the CPU denoted by a bucket may reduce number of times the job
 390  390   *       switches between CPUs.
 391  391   *
 392  392   *       Current algorithm creates a thread whenever a bucket has no free
 393  393   *       entries. It would be nice to know how many threads are in the running
 394  394   *       state and don't create threads if all CPUs are busy with existing
 395  395   *       tasks, but it is unclear how such strategy can be implemented.
 396  396   *
 397  397   *       Currently buckets are created statically as an array attached to task
 398  398   *       queue. On some system with nCPUs < max_ncpus it may waste system
 399  399   *       memory. One solution may be allocation of buckets when they are first
 400  400   *       touched, but it is not clear how useful it is.
 401  401   *
 402  402   * SUSPEND/RESUME implementation -----------------------------------------------
 403  403   *
 404  404   *      Before executing a task taskq_thread() (executing non-dynamic task
 405  405   *      queues) obtains taskq's thread lock as a reader. The taskq_suspend()
 406  406   *      function gets the same lock as a writer blocking all non-dynamic task
 407  407   *      execution. The taskq_resume() function releases the lock allowing
 408  408   *      taskq_thread to continue execution.
 409  409   *
 410  410   *      For dynamic task queues, each bucket is marked as TQBUCKET_SUSPEND by
 411  411   *      taskq_suspend() function. After that taskq_bucket_dispatch() always
 412  412   *      fails, so that taskq_dispatch() will either enqueue tasks for a
 413  413   *      suspended backing queue or fail if TQ_NOQUEUE is specified in dispatch
 414  414   *      flags.
 415  415   *
 416  416   *      NOTE: taskq_suspend() does not immediately block any tasks already
 417  417   *            scheduled for dynamic task queues. It only suspends new tasks
 418  418   *            scheduled after taskq_suspend() was called.
 419  419   *
 420  420   *      taskq_member() function works by comparing a thread t_taskq pointer with
 421  421   *      the passed thread pointer.
 422  422   *
 423  423   * LOCKS and LOCK Hierarchy ----------------------------------------------------
 424  424   *
 425  425   *   There are three locks used in task queues:
 426  426   *
 427  427   *   1) The taskq_t's tq_lock, protecting global task queue state.
 428  428   *
 429  429   *   2) Each per-CPU bucket has a lock for bucket management.
 430  430   *
 431  431   *   3) The global taskq_cpupct_lock, which protects the list of
 432  432   *      TASKQ_THREADS_CPU_PCT taskqs.
 433  433   *
 434  434   *   If both (1) and (2) are needed, tq_lock should be taken *after* the bucket
 435  435   *   lock.
 436  436   *
 437  437   *   If both (1) and (3) are needed, tq_lock should be taken *after*
 438  438   *   taskq_cpupct_lock.
 439  439   *
 440  440   * DEBUG FACILITIES ------------------------------------------------------------
 441  441   *
 442  442   * For DEBUG kernels it is possible to induce random failures to
 443  443   * taskq_dispatch() function when it is given TQ_NOSLEEP argument. The value of
 444  444   * taskq_dmtbf and taskq_smtbf tunables control the mean time between induced
 445  445   * failures for dynamic and static task queues respectively.
 446  446   *
 447  447   * Setting TASKQ_STATISTIC to 0 will disable per-bucket statistics.
 448  448   *
 449  449   * TUNABLES --------------------------------------------------------------------
 450  450   *
 451  451   *      system_taskq_size       - Size of the global system_taskq.
 452  452   *                                This value is multiplied by nCPUs to determine
 453  453   *                                actual size.
 454  454   *                                Default value: 64
 455  455   *
 456  456   *      taskq_minimum_nthreads_max
 457  457   *                              - Minimum size of the thread list for a taskq.
 458  458   *                                Useful for testing different thread pool
 459  459   *                                sizes by overwriting tq_nthreads_target.
 460  460   *
 461  461   *      taskq_thread_timeout    - Maximum idle time for taskq_d_thread()
 462  462   *                                Default value: 5 minutes
 463  463   *
 464  464   *      taskq_maxbuckets        - Maximum number of buckets in any task queue
 465  465   *                                Default value: 128
 466  466   *
 467  467   *      taskq_search_depth      - Maximum # of buckets searched for a free entry
 468  468   *                                Default value: 4
 469  469   *
 470  470   *      taskq_dmtbf             - Mean time between induced dispatch failures
 471  471   *                                for dynamic task queues.
 472  472   *                                Default value: UINT_MAX (no induced failures)
 473  473   *
 474  474   *      taskq_smtbf             - Mean time between induced dispatch failures
 475  475   *                                for static task queues.
 476  476   *                                Default value: UINT_MAX (no induced failures)
 477  477   *
 478  478   * CONDITIONAL compilation -----------------------------------------------------
 479  479   *
 480  480   *    TASKQ_STATISTIC   - If set will enable bucket statistic (default).
 481  481   *
 482  482   */
 483  483  
 484  484  #include <sys/taskq_impl.h>
 485  485  #include <sys/thread.h>
 486  486  #include <sys/proc.h>
 487  487  #include <sys/kmem.h>
 488  488  #include <sys/vmem.h>
 489  489  #include <sys/callb.h>
 490  490  #include <sys/class.h>
 491  491  #include <sys/systm.h>
 492  492  #include <sys/cmn_err.h>
 493  493  #include <sys/debug.h>
 494  494  #include <sys/vmsystm.h>        /* For throttlefree */
 495  495  #include <sys/sysmacros.h>
 496  496  #include <sys/cpuvar.h>
 497  497  #include <sys/cpupart.h>
 498  498  #include <sys/sdt.h>
 499  499  #include <sys/sysdc.h>
 500  500  #include <sys/note.h>
 501  501  
 502  502  static kmem_cache_t *taskq_ent_cache, *taskq_cache;
 503  503  
 504  504  /*
 505  505   * Pseudo instance numbers for taskqs without explicitly provided instance.
 506  506   */
 507  507  static vmem_t *taskq_id_arena;
 508  508  
 509  509  /* Global system task queue for common use */
 510  510  taskq_t *system_taskq;
 511  511  
 512  512  /*
 513  513   * Maximum number of entries in global system taskq is
 514  514   *      system_taskq_size * max_ncpus
 515  515   */
 516  516  #define SYSTEM_TASKQ_SIZE 64
 517  517  int system_taskq_size = SYSTEM_TASKQ_SIZE;
 518  518  
 519  519  /*
 520  520   * Minimum size for tq_nthreads_max; useful for those who want to play around
 521  521   * with increasing a taskq's tq_nthreads_target.
 522  522   */
 523  523  int taskq_minimum_nthreads_max = 1;
 524  524  
 525  525  /*
 526  526   * We want to ensure that when taskq_create() returns, there is at least
 527  527   * one thread ready to handle requests.  To guarantee this, we have to wait
 528  528   * for the second thread, since the first one cannot process requests until
 529  529   * the second thread has been created.
 530  530   */
 531  531  #define TASKQ_CREATE_ACTIVE_THREADS     2
 532  532  
 533  533  /* Maximum percentage allowed for TASKQ_THREADS_CPU_PCT */
 534  534  #define TASKQ_CPUPCT_MAX_PERCENT        1000
 535  535  int taskq_cpupct_max_percent = TASKQ_CPUPCT_MAX_PERCENT;
 536  536  
 537  537  /*
 538  538   * Dynamic task queue threads that don't get any work within
 539  539   * taskq_thread_timeout destroy themselves
 540  540   */
 541  541  #define TASKQ_THREAD_TIMEOUT (60 * 5)
 542  542  int taskq_thread_timeout = TASKQ_THREAD_TIMEOUT;
 543  543  
 544  544  #define TASKQ_MAXBUCKETS 128
 545  545  int taskq_maxbuckets = TASKQ_MAXBUCKETS;
 546  546  
 547  547  /*
 548  548   * When a bucket has no available entries another buckets are tried.
 549  549   * taskq_search_depth parameter limits the amount of buckets that we search
 550  550   * before failing. This is mostly useful in systems with many CPUs where we may
 551  551   * spend too much time scanning busy buckets.
 552  552   */
 553  553  #define TASKQ_SEARCH_DEPTH 4
 554  554  int taskq_search_depth = TASKQ_SEARCH_DEPTH;
 555  555  
 556  556  /*
 557  557   * Hashing function: mix various bits of x. May be pretty much anything.
 558  558   */
 559  559  #define TQ_HASH(x) ((x) ^ ((x) >> 11) ^ ((x) >> 17) ^ ((x) ^ 27))
 560  560  
 561  561  /*
 562  562   * We do not create any new threads when the system is low on memory and start
 563  563   * throttling memory allocations. The following macro tries to estimate such
 564  564   * condition.
 565  565   */
 566  566  #define ENOUGH_MEMORY() (freemem > throttlefree)
 567  567  
 568  568  /*
 569  569   * Static functions.
 570  570   */
 571  571  static taskq_t  *taskq_create_common(const char *, int, int, pri_t, int,
 572  572      int, proc_t *, uint_t, uint_t);
 573  573  static void taskq_thread(void *);
 574  574  static void taskq_d_thread(taskq_ent_t *);
 575  575  static void taskq_bucket_extend(void *);
 576  576  static int  taskq_constructor(void *, void *, int);
 577  577  static void taskq_destructor(void *, void *);
 578  578  static int  taskq_ent_constructor(void *, void *, int);
 579  579  static void taskq_ent_destructor(void *, void *);
 580  580  static taskq_ent_t *taskq_ent_alloc(taskq_t *, int);
 581  581  static void taskq_ent_free(taskq_t *, taskq_ent_t *);
 582  582  static int taskq_ent_exists(taskq_t *, task_func_t, void *);
 583  583  static taskq_ent_t *taskq_bucket_dispatch(taskq_bucket_t *, task_func_t,
 584  584      void *);
 585  585  
 586  586  /*
 587  587   * Task queues kstats.
 588  588   */
 589  589  struct taskq_kstat {
 590  590          kstat_named_t   tq_pid;
 591  591          kstat_named_t   tq_tasks;
 592  592          kstat_named_t   tq_executed;
 593  593          kstat_named_t   tq_maxtasks;
 594  594          kstat_named_t   tq_totaltime;
 595  595          kstat_named_t   tq_nalloc;
 596  596          kstat_named_t   tq_nactive;
 597  597          kstat_named_t   tq_pri;
 598  598          kstat_named_t   tq_nthreads;
 599  599  } taskq_kstat = {
 600  600          { "pid",                KSTAT_DATA_UINT64 },
 601  601          { "tasks",              KSTAT_DATA_UINT64 },
 602  602          { "executed",           KSTAT_DATA_UINT64 },
 603  603          { "maxtasks",           KSTAT_DATA_UINT64 },
 604  604          { "totaltime",          KSTAT_DATA_UINT64 },
 605  605          { "nactive",            KSTAT_DATA_UINT64 },
 606  606          { "nalloc",             KSTAT_DATA_UINT64 },
 607  607          { "priority",           KSTAT_DATA_UINT64 },
 608  608          { "threads",            KSTAT_DATA_UINT64 },
 609  609  };
 610  610  
 611  611  struct taskq_d_kstat {
 612  612          kstat_named_t   tqd_pri;
 613  613          kstat_named_t   tqd_btasks;
 614  614          kstat_named_t   tqd_bexecuted;
 615  615          kstat_named_t   tqd_bmaxtasks;
 616  616          kstat_named_t   tqd_bnalloc;
 617  617          kstat_named_t   tqd_bnactive;
 618  618          kstat_named_t   tqd_btotaltime;
 619  619          kstat_named_t   tqd_hits;
 620  620          kstat_named_t   tqd_misses;
 621  621          kstat_named_t   tqd_overflows;
 622  622          kstat_named_t   tqd_tcreates;
 623  623          kstat_named_t   tqd_tdeaths;
 624  624          kstat_named_t   tqd_maxthreads;
 625  625          kstat_named_t   tqd_nomem;
 626  626          kstat_named_t   tqd_disptcreates;
 627  627          kstat_named_t   tqd_totaltime;
 628  628          kstat_named_t   tqd_nalloc;
 629  629          kstat_named_t   tqd_nfree;
 630  630  } taskq_d_kstat = {
 631  631          { "priority",           KSTAT_DATA_UINT64 },
 632  632          { "btasks",             KSTAT_DATA_UINT64 },
 633  633          { "bexecuted",          KSTAT_DATA_UINT64 },
 634  634          { "bmaxtasks",          KSTAT_DATA_UINT64 },
 635  635          { "bnalloc",            KSTAT_DATA_UINT64 },
 636  636          { "bnactive",           KSTAT_DATA_UINT64 },
 637  637          { "btotaltime",         KSTAT_DATA_UINT64 },
 638  638          { "hits",               KSTAT_DATA_UINT64 },
 639  639          { "misses",             KSTAT_DATA_UINT64 },
 640  640          { "overflows",          KSTAT_DATA_UINT64 },
 641  641          { "tcreates",           KSTAT_DATA_UINT64 },
 642  642          { "tdeaths",            KSTAT_DATA_UINT64 },
 643  643          { "maxthreads",         KSTAT_DATA_UINT64 },
 644  644          { "nomem",              KSTAT_DATA_UINT64 },
 645  645          { "disptcreates",       KSTAT_DATA_UINT64 },
 646  646          { "totaltime",          KSTAT_DATA_UINT64 },
 647  647          { "nalloc",             KSTAT_DATA_UINT64 },
 648  648          { "nfree",              KSTAT_DATA_UINT64 },
 649  649  };
 650  650  
 651  651  static kmutex_t taskq_kstat_lock;
 652  652  static kmutex_t taskq_d_kstat_lock;
 653  653  static int taskq_kstat_update(kstat_t *, int);
 654  654  static int taskq_d_kstat_update(kstat_t *, int);
 655  655  
 656  656  /*
 657  657   * List of all TASKQ_THREADS_CPU_PCT taskqs.
 658  658   */
 659  659  static list_t taskq_cpupct_list;        /* protected by cpu_lock */
 660  660  
 661  661  /*
 662  662   * Collect per-bucket statistic when TASKQ_STATISTIC is defined.
 663  663   */
 664  664  #define TASKQ_STATISTIC 1
 665  665  
 666  666  #if TASKQ_STATISTIC
 667  667  #define TQ_STAT(b, x)   b->tqbucket_stat.x++
 668  668  #else
 669  669  #define TQ_STAT(b, x)
 670  670  #endif
 671  671  
 672  672  /*
 673  673   * Random fault injection.
 674  674   */
 675  675  uint_t taskq_random;
 676  676  uint_t taskq_dmtbf = UINT_MAX;    /* mean time between injected failures */
 677  677  uint_t taskq_smtbf = UINT_MAX;    /* mean time between injected failures */
 678  678  
 679  679  /*
 680  680   * TQ_NOSLEEP dispatches on dynamic task queues are always allowed to fail.
 681  681   *
 682  682   * TQ_NOSLEEP dispatches on static task queues can't arbitrarily fail because
 683  683   * they could prepopulate the cache and make sure that they do not use more
 684  684   * then minalloc entries.  So, fault injection in this case insures that
 685  685   * either TASKQ_PREPOPULATE is not set or there are more entries allocated
 686  686   * than is specified by minalloc.  TQ_NOALLOC dispatches are always allowed
 687  687   * to fail, but for simplicity we treat them identically to TQ_NOSLEEP
 688  688   * dispatches.
 689  689   */
 690  690  #ifdef DEBUG
 691  691  #define TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag)               \
 692  692          taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
 693  693          if ((flag & TQ_NOSLEEP) &&                              \
 694  694              taskq_random < 1771875 / taskq_dmtbf) {             \
 695  695                  return (NULL);                                  \
 696  696          }
 697  697  
 698  698  #define TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag)               \
 699  699          taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
 700  700          if ((flag & (TQ_NOSLEEP | TQ_NOALLOC)) &&               \
 701  701              (!(tq->tq_flags & TASKQ_PREPOPULATE) ||             \
 702  702              (tq->tq_nalloc > tq->tq_minalloc)) &&               \
 703  703              (taskq_random < (1771875 / taskq_smtbf))) {         \
 704  704                  mutex_exit(&tq->tq_lock);                       \
 705  705                  return (NULL);                                  \
 706  706          }
 707  707  #else
 708  708  #define TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag)
 709  709  #define TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag)
 710  710  #endif
 711  711  
 712  712  #define IS_EMPTY(l) (((l).tqent_prev == (l).tqent_next) &&      \
 713  713          ((l).tqent_prev == &(l)))
 714  714  
 715  715  /*
 716  716   * Append `tqe' in the end of the doubly-linked list denoted by l.
 717  717   */
 718  718  #define TQ_APPEND(l, tqe) {                                     \
 719  719          tqe->tqent_next = &l;                                   \
 720  720          tqe->tqent_prev = l.tqent_prev;                         \
 721  721          tqe->tqent_next->tqent_prev = tqe;                      \
 722  722          tqe->tqent_prev->tqent_next = tqe;                      \
 723  723  }
 724  724  /*
 725  725   * Prepend 'tqe' to the beginning of l
 726  726   */
 727  727  #define TQ_PREPEND(l, tqe) {                                    \
 728  728          tqe->tqent_next = l.tqent_next;                         \
 729  729          tqe->tqent_prev = &l;                                   \
 730  730          tqe->tqent_next->tqent_prev = tqe;                      \
 731  731          tqe->tqent_prev->tqent_next = tqe;                      \
 732  732  }
 733  733  
 734  734  /*
 735  735   * Schedule a task specified by func and arg into the task queue entry tqe.
 736  736   */
 737  737  #define TQ_DO_ENQUEUE(tq, tqe, func, arg, front) {                      \
 738  738          ASSERT(MUTEX_HELD(&tq->tq_lock));                               \
 739  739          _NOTE(CONSTCOND)                                                \
 740  740          if (front) {                                                    \
 741  741                  TQ_PREPEND(tq->tq_task, tqe);                           \
 742  742          } else {                                                        \
 743  743                  TQ_APPEND(tq->tq_task, tqe);                            \
 744  744          }                                                               \
 745  745          tqe->tqent_func = (func);                                       \
 746  746          tqe->tqent_arg = (arg);                                         \
 747  747          tq->tq_tasks++;                                                 \
 748  748          if (tq->tq_tasks - tq->tq_executed > tq->tq_maxtasks)           \
 749  749                  tq->tq_maxtasks = tq->tq_tasks - tq->tq_executed;       \
 750  750          cv_signal(&tq->tq_dispatch_cv);                                 \
 751  751          DTRACE_PROBE2(taskq__enqueue, taskq_t *, tq, taskq_ent_t *, tqe); \
 752  752  }
 753  753  
 754  754  #define TQ_ENQUEUE(tq, tqe, func, arg)                                  \
 755  755          TQ_DO_ENQUEUE(tq, tqe, func, arg, 0)
 756  756  
 757  757  #define TQ_ENQUEUE_FRONT(tq, tqe, func, arg)                            \
 758  758          TQ_DO_ENQUEUE(tq, tqe, func, arg, 1)
 759  759  
 760  760  /*
 761  761   * Do-nothing task which may be used to prepopulate thread caches.
 762  762   */
 763  763  /*ARGSUSED*/
 764  764  void
 765  765  nulltask(void *unused)
 766  766  {
 767  767  }
 768  768  
 769  769  /*ARGSUSED*/
 770  770  static int
 771  771  taskq_constructor(void *buf, void *cdrarg, int kmflags)
 772  772  {
 773  773          taskq_t *tq = buf;
 774  774  
 775  775          bzero(tq, sizeof (taskq_t));
 776  776  
 777  777          mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
 778  778          rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
 779  779          cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
 780  780          cv_init(&tq->tq_exit_cv, NULL, CV_DEFAULT, NULL);
 781  781          cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
 782  782          cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
 783  783  
 784  784          tq->tq_task.tqent_next = &tq->tq_task;
 785  785          tq->tq_task.tqent_prev = &tq->tq_task;
 786  786  
 787  787          return (0);
 788  788  }
 789  789  
 790  790  /*ARGSUSED*/
 791  791  static void
 792  792  taskq_destructor(void *buf, void *cdrarg)
 793  793  {
 794  794          taskq_t *tq = buf;
 795  795  
 796  796          ASSERT(tq->tq_nthreads == 0);
 797  797          ASSERT(tq->tq_buckets == NULL);
 798  798          ASSERT(tq->tq_tcreates == 0);
 799  799          ASSERT(tq->tq_tdeaths == 0);
 800  800  
 801  801          mutex_destroy(&tq->tq_lock);
 802  802          rw_destroy(&tq->tq_threadlock);
 803  803          cv_destroy(&tq->tq_dispatch_cv);
 804  804          cv_destroy(&tq->tq_exit_cv);
 805  805          cv_destroy(&tq->tq_wait_cv);
 806  806          cv_destroy(&tq->tq_maxalloc_cv);
 807  807  }
 808  808  
 809  809  /*ARGSUSED*/
 810  810  static int
 811  811  taskq_ent_constructor(void *buf, void *cdrarg, int kmflags)
 812  812  {
 813  813          taskq_ent_t *tqe = buf;
 814  814  
 815  815          tqe->tqent_thread = NULL;
 816  816          cv_init(&tqe->tqent_cv, NULL, CV_DEFAULT, NULL);
 817  817  
 818  818          return (0);
 819  819  }
 820  820  
 821  821  /*ARGSUSED*/
 822  822  static void
 823  823  taskq_ent_destructor(void *buf, void *cdrarg)
 824  824  {
 825  825          taskq_ent_t *tqe = buf;
 826  826  
 827  827          ASSERT(tqe->tqent_thread == NULL);
 828  828          cv_destroy(&tqe->tqent_cv);
 829  829  }
 830  830  
 831  831  void
 832  832  taskq_init(void)
 833  833  {
 834  834          taskq_ent_cache = kmem_cache_create("taskq_ent_cache",
 835  835              sizeof (taskq_ent_t), 0, taskq_ent_constructor,
 836  836              taskq_ent_destructor, NULL, NULL, NULL, 0);
 837  837          taskq_cache = kmem_cache_create("taskq_cache", sizeof (taskq_t),
 838  838              0, taskq_constructor, taskq_destructor, NULL, NULL, NULL, 0);
 839  839          taskq_id_arena = vmem_create("taskq_id_arena",
 840  840              (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0,
 841  841              VM_SLEEP | VMC_IDENTIFIER);
 842  842  
 843  843          list_create(&taskq_cpupct_list, sizeof (taskq_t),
 844  844              offsetof(taskq_t, tq_cpupct_link));
 845  845  }
 846  846  
 847  847  static void
 848  848  taskq_update_nthreads(taskq_t *tq, uint_t ncpus)
 849  849  {
 850  850          uint_t newtarget = TASKQ_THREADS_PCT(ncpus, tq->tq_threads_ncpus_pct);
 851  851  
 852  852          ASSERT(MUTEX_HELD(&cpu_lock));
 853  853          ASSERT(MUTEX_HELD(&tq->tq_lock));
 854  854  
 855  855          /* We must be going from non-zero to non-zero; no exiting. */
 856  856          ASSERT3U(tq->tq_nthreads_target, !=, 0);
 857  857          ASSERT3U(newtarget, !=, 0);
 858  858  
 859  859          ASSERT3U(newtarget, <=, tq->tq_nthreads_max);
 860  860          if (newtarget != tq->tq_nthreads_target) {
 861  861                  tq->tq_flags |= TASKQ_CHANGING;
 862  862                  tq->tq_nthreads_target = newtarget;
 863  863                  cv_broadcast(&tq->tq_dispatch_cv);
 864  864                  cv_broadcast(&tq->tq_exit_cv);
 865  865          }
 866  866  }
 867  867  
 868  868  /* called during task queue creation */
 869  869  static void
 870  870  taskq_cpupct_install(taskq_t *tq, cpupart_t *cpup)
 871  871  {
 872  872          ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
 873  873  
 874  874          mutex_enter(&cpu_lock);
 875  875          mutex_enter(&tq->tq_lock);
 876  876          tq->tq_cpupart = cpup->cp_id;
 877  877          taskq_update_nthreads(tq, cpup->cp_ncpus);
 878  878          mutex_exit(&tq->tq_lock);
 879  879  
 880  880          list_insert_tail(&taskq_cpupct_list, tq);
 881  881          mutex_exit(&cpu_lock);
 882  882  }
 883  883  
 884  884  static void
 885  885  taskq_cpupct_remove(taskq_t *tq)
 886  886  {
 887  887          ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
 888  888  
 889  889          mutex_enter(&cpu_lock);
 890  890          list_remove(&taskq_cpupct_list, tq);
 891  891          mutex_exit(&cpu_lock);
 892  892  }
 893  893  
 894  894  /*ARGSUSED*/
 895  895  static int
 896  896  taskq_cpu_setup(cpu_setup_t what, int id, void *arg)
 897  897  {
 898  898          taskq_t *tq;
 899  899          cpupart_t *cp = cpu[id]->cpu_part;
 900  900          uint_t ncpus = cp->cp_ncpus;
 901  901  
 902  902          ASSERT(MUTEX_HELD(&cpu_lock));
 903  903          ASSERT(ncpus > 0);
 904  904  
 905  905          switch (what) {
 906  906          case CPU_OFF:
 907  907          case CPU_CPUPART_OUT:
 908  908                  /* offlines are called *before* the cpu is offlined. */
 909  909                  if (ncpus > 1)
 910  910                          ncpus--;
 911  911                  break;
 912  912  
 913  913          case CPU_ON:
 914  914          case CPU_CPUPART_IN:
 915  915                  break;
 916  916  
 917  917          default:
 918  918                  return (0);             /* doesn't affect cpu count */
 919  919          }
 920  920  
 921  921          for (tq = list_head(&taskq_cpupct_list); tq != NULL;
 922  922              tq = list_next(&taskq_cpupct_list, tq)) {
 923  923  
 924  924                  mutex_enter(&tq->tq_lock);
 925  925                  /*
 926  926                   * If the taskq is part of the cpuset which is changing,
 927  927                   * update its nthreads_target.
 928  928                   */
 929  929                  if (tq->tq_cpupart == cp->cp_id) {
 930  930                          taskq_update_nthreads(tq, ncpus);
 931  931                  }
 932  932                  mutex_exit(&tq->tq_lock);
 933  933          }
 934  934          return (0);
 935  935  }
 936  936  
 937  937  void
 938  938  taskq_mp_init(void)
 939  939  {
 940  940          mutex_enter(&cpu_lock);
 941  941          register_cpu_setup_func(taskq_cpu_setup, NULL);
 942  942          /*
 943  943           * Make sure we're up to date.  At this point in boot, there is only
 944  944           * one processor set, so we only have to update the current CPU.
 945  945           */
 946  946          (void) taskq_cpu_setup(CPU_ON, CPU->cpu_id, NULL);
 947  947          mutex_exit(&cpu_lock);
 948  948  }
 949  949  
 950  950  /*
 951  951   * Create global system dynamic task queue.
 952  952   */
 953  953  void
 954  954  system_taskq_init(void)
 955  955  {
 956  956          system_taskq = taskq_create_common("system_taskq", 0,
 957  957              system_taskq_size * max_ncpus, minclsyspri, 4, 512, &p0, 0,
 958  958              TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
 959  959  }
 960  960  
 961  961  /*
 962  962   * taskq_ent_alloc()
 963  963   *
 964  964   * Allocates a new taskq_ent_t structure either from the free list or from the
 965  965   * cache. Returns NULL if it can't be allocated.
 966  966   *
 967  967   * Assumes: tq->tq_lock is held.
 968  968   */
 969  969  static taskq_ent_t *
 970  970  taskq_ent_alloc(taskq_t *tq, int flags)
 971  971  {
 972  972          int kmflags = (flags & TQ_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 973  973          taskq_ent_t *tqe;
 974  974          clock_t wait_time;
 975  975          clock_t wait_rv;
 976  976  
 977  977          ASSERT(MUTEX_HELD(&tq->tq_lock));
 978  978  
 979  979          /*
 980  980           * TQ_NOALLOC allocations are allowed to use the freelist, even if
 981  981           * we are below tq_minalloc.
 982  982           */
 983  983  again:  if ((tqe = tq->tq_freelist) != NULL &&
 984  984              ((flags & TQ_NOALLOC) || tq->tq_nalloc >= tq->tq_minalloc)) {
 985  985                  tq->tq_freelist = tqe->tqent_next;
 986  986          } else {
 987  987                  if (flags & TQ_NOALLOC)
 988  988                          return (NULL);
 989  989  
 990  990                  if (tq->tq_nalloc >= tq->tq_maxalloc) {
 991  991                          if (kmflags & KM_NOSLEEP)
 992  992                                  return (NULL);
 993  993  
 994  994                          /*
 995  995                           * We don't want to exceed tq_maxalloc, but we can't
 996  996                           * wait for other tasks to complete (and thus free up
 997  997                           * task structures) without risking deadlock with
 998  998                           * the caller.  So, we just delay for one second
 999  999                           * to throttle the allocation rate. If we have tasks
1000 1000                           * complete before one second timeout expires then
1001 1001                           * taskq_ent_free will signal us and we will
1002 1002                           * immediately retry the allocation (reap free).
1003 1003                           */
1004 1004                          wait_time = ddi_get_lbolt() + hz;
1005 1005                          while (tq->tq_freelist == NULL) {
1006 1006                                  tq->tq_maxalloc_wait++;
1007 1007                                  wait_rv = cv_timedwait(&tq->tq_maxalloc_cv,
1008 1008                                      &tq->tq_lock, wait_time);
1009 1009                                  tq->tq_maxalloc_wait--;
1010 1010                                  if (wait_rv == -1)
1011 1011                                          break;
1012 1012                          }
1013 1013                          if (tq->tq_freelist)
1014 1014                                  goto again;             /* reap freelist */
1015 1015  
1016 1016                  }
1017 1017                  mutex_exit(&tq->tq_lock);
1018 1018  
1019 1019                  tqe = kmem_cache_alloc(taskq_ent_cache, kmflags);
1020 1020  
1021 1021                  mutex_enter(&tq->tq_lock);
1022 1022                  if (tqe != NULL)
1023 1023                          tq->tq_nalloc++;
1024 1024          }
1025 1025          return (tqe);
1026 1026  }
1027 1027  
1028 1028  /*
1029 1029   * taskq_ent_free()
1030 1030   *
1031 1031   * Free taskq_ent_t structure by either putting it on the free list or freeing
1032 1032   * it to the cache.
1033 1033   *
1034 1034   * Assumes: tq->tq_lock is held.
1035 1035   */
1036 1036  static void
1037 1037  taskq_ent_free(taskq_t *tq, taskq_ent_t *tqe)
1038 1038  {
1039 1039          ASSERT(MUTEX_HELD(&tq->tq_lock));
1040 1040  
1041 1041          if (tq->tq_nalloc <= tq->tq_minalloc) {
1042 1042                  tqe->tqent_next = tq->tq_freelist;
1043 1043                  tq->tq_freelist = tqe;
1044 1044          } else {
1045 1045                  tq->tq_nalloc--;
1046 1046                  mutex_exit(&tq->tq_lock);
1047 1047                  kmem_cache_free(taskq_ent_cache, tqe);
1048 1048                  mutex_enter(&tq->tq_lock);
1049 1049          }
1050 1050  
1051 1051          if (tq->tq_maxalloc_wait)
1052 1052                  cv_signal(&tq->tq_maxalloc_cv);
1053 1053  }
1054 1054  
1055 1055  /*
1056 1056   * taskq_ent_exists()
1057 1057   *
1058 1058   * Return 1 if taskq already has entry for calling 'func(arg)'.
1059 1059   *
1060 1060   * Assumes: tq->tq_lock is held.
1061 1061   */
1062 1062  static int
1063 1063  taskq_ent_exists(taskq_t *tq, task_func_t func, void *arg)
1064 1064  {
1065 1065          taskq_ent_t     *tqe;
1066 1066  
1067 1067          ASSERT(MUTEX_HELD(&tq->tq_lock));
1068 1068  
1069 1069          for (tqe = tq->tq_task.tqent_next; tqe != &tq->tq_task;
1070 1070              tqe = tqe->tqent_next)
1071 1071                  if ((tqe->tqent_func == func) && (tqe->tqent_arg == arg))
1072 1072                          return (1);
1073 1073          return (0);
1074 1074  }
1075 1075  
1076 1076  /*
1077 1077   * Dispatch a task "func(arg)" to a free entry of bucket b.
1078 1078   *
1079 1079   * Assumes: no bucket locks is held.
1080 1080   *
1081 1081   * Returns: a pointer to an entry if dispatch was successful.
1082 1082   *          NULL if there are no free entries or if the bucket is suspended.
1083 1083   */
1084 1084  static taskq_ent_t *
1085 1085  taskq_bucket_dispatch(taskq_bucket_t *b, task_func_t func, void *arg)
1086 1086  {
1087 1087          taskq_ent_t *tqe;
1088 1088  
1089 1089          ASSERT(MUTEX_NOT_HELD(&b->tqbucket_lock));
1090 1090          ASSERT(func != NULL);
1091 1091  
1092 1092          mutex_enter(&b->tqbucket_lock);
1093 1093  
1094 1094          ASSERT(b->tqbucket_nfree != 0 || IS_EMPTY(b->tqbucket_freelist));
1095 1095          ASSERT(b->tqbucket_nfree == 0 || !IS_EMPTY(b->tqbucket_freelist));
1096 1096  
1097 1097          /*
1098 1098           * Get en entry from the freelist if there is one.
1099 1099           * Schedule task into the entry.
1100 1100           */
1101 1101          if ((b->tqbucket_nfree != 0) &&
1102 1102              !(b->tqbucket_flags & TQBUCKET_SUSPEND)) {
1103 1103                  tqe = b->tqbucket_freelist.tqent_prev;
1104 1104  
1105 1105                  ASSERT(tqe != &b->tqbucket_freelist);
1106 1106                  ASSERT(tqe->tqent_thread != NULL);
1107 1107  
1108 1108                  tqe->tqent_prev->tqent_next = tqe->tqent_next;
1109 1109                  tqe->tqent_next->tqent_prev = tqe->tqent_prev;
1110 1110                  b->tqbucket_nalloc++;
1111 1111                  b->tqbucket_nfree--;
1112 1112                  tqe->tqent_func = func;
1113 1113                  tqe->tqent_arg = arg;
1114 1114                  TQ_STAT(b, tqs_hits);
1115 1115                  cv_signal(&tqe->tqent_cv);
1116 1116                  DTRACE_PROBE2(taskq__d__enqueue, taskq_bucket_t *, b,
1117 1117                      taskq_ent_t *, tqe);
1118 1118          } else {
1119 1119                  tqe = NULL;
1120 1120                  TQ_STAT(b, tqs_misses);
1121 1121          }
1122 1122          mutex_exit(&b->tqbucket_lock);
1123 1123          return (tqe);
1124 1124  }
1125 1125  
1126 1126  /*
1127 1127   * Dispatch a task.
1128 1128   *
1129 1129   * Assumes: func != NULL
1130 1130   *
1131 1131   * Returns: NULL if dispatch failed.
1132 1132   *          non-NULL if task dispatched successfully.
1133 1133   *          Actual return value is the pointer to taskq entry that was used to
1134 1134   *          dispatch a task. This is useful for debugging.
1135 1135   */
1136 1136  taskqid_t
1137 1137  taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
1138 1138  {
1139 1139          taskq_bucket_t *bucket = NULL;  /* Which bucket needs extension */
1140 1140          taskq_ent_t *tqe = NULL;
1141 1141          taskq_ent_t *tqe1;
1142 1142          uint_t bsize;
1143 1143  
1144 1144          ASSERT(tq != NULL);
1145 1145          ASSERT(func != NULL);
1146 1146  
1147 1147          if (!(tq->tq_flags & TASKQ_DYNAMIC)) {
1148 1148                  /*
1149 1149                   * TQ_NOQUEUE flag can't be used with non-dynamic task queues.
1150 1150                   */
1151 1151                  ASSERT(!(flags & TQ_NOQUEUE));
1152 1152                  /*
1153 1153                   * Enqueue the task to the underlying queue.
1154 1154                   */
1155 1155                  mutex_enter(&tq->tq_lock);
1156 1156  
1157 1157                  TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flags);
1158 1158  
1159 1159                  if ((tqe = taskq_ent_alloc(tq, flags)) == NULL) {
1160 1160                          mutex_exit(&tq->tq_lock);
1161 1161                          return (NULL);
1162 1162                  }
1163 1163                  /* Make sure we start without any flags */
1164 1164                  tqe->tqent_un.tqent_flags = 0;
1165 1165  
1166 1166                  if (flags & TQ_FRONT) {
1167 1167                          TQ_ENQUEUE_FRONT(tq, tqe, func, arg);
1168 1168                  } else {
1169 1169                          TQ_ENQUEUE(tq, tqe, func, arg);
1170 1170                  }
1171 1171                  mutex_exit(&tq->tq_lock);
1172 1172                  return ((taskqid_t)tqe);
1173 1173          }
1174 1174  
1175 1175          /*
1176 1176           * Dynamic taskq dispatching.
1177 1177           */
1178 1178          ASSERT(!(flags & (TQ_NOALLOC | TQ_FRONT)));
1179 1179          TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flags);
1180 1180  
1181 1181          bsize = tq->tq_nbuckets;
1182 1182  
1183 1183          if (bsize == 1) {
1184 1184                  /*
1185 1185                   * In a single-CPU case there is only one bucket, so get
1186 1186                   * entry directly from there.
1187 1187                   */
1188 1188                  if ((tqe = taskq_bucket_dispatch(tq->tq_buckets, func, arg))
1189 1189                      != NULL)
1190 1190                          return ((taskqid_t)tqe);        /* Fastpath */
1191 1191                  bucket = tq->tq_buckets;
1192 1192          } else {
1193 1193                  int loopcount;
1194 1194                  taskq_bucket_t *b;
1195 1195                  uintptr_t h = ((uintptr_t)CPU + (uintptr_t)arg) >> 3;
1196 1196  
1197 1197                  h = TQ_HASH(h);
1198 1198  
1199 1199                  /*
1200 1200                   * The 'bucket' points to the original bucket that we hit. If we
1201 1201                   * can't allocate from it, we search other buckets, but only
1202 1202                   * extend this one.
1203 1203                   */
1204 1204                  b = &tq->tq_buckets[h & (bsize - 1)];
1205 1205                  ASSERT(b->tqbucket_taskq == tq);        /* Sanity check */
1206 1206  
1207 1207                  /*
1208 1208                   * Do a quick check before grabbing the lock. If the bucket does
1209 1209                   * not have free entries now, chances are very small that it
1210 1210                   * will after we take the lock, so we just skip it.
1211 1211                   */
1212 1212                  if (b->tqbucket_nfree != 0) {
1213 1213                          if ((tqe = taskq_bucket_dispatch(b, func, arg)) != NULL)
1214 1214                                  return ((taskqid_t)tqe);        /* Fastpath */
1215 1215                  } else {
1216 1216                          TQ_STAT(b, tqs_misses);
1217 1217                  }
1218 1218  
1219 1219                  bucket = b;
1220 1220                  loopcount = MIN(taskq_search_depth, bsize);
1221 1221                  /*
1222 1222                   * If bucket dispatch failed, search loopcount number of buckets
1223 1223                   * before we give up and fail.
1224 1224                   */
1225 1225                  do {
1226 1226                          b = &tq->tq_buckets[++h & (bsize - 1)];
1227 1227                          ASSERT(b->tqbucket_taskq == tq);  /* Sanity check */
1228 1228                          loopcount--;
1229 1229  
1230 1230                          if (b->tqbucket_nfree != 0) {
1231 1231                                  tqe = taskq_bucket_dispatch(b, func, arg);
1232 1232                          } else {
1233 1233                                  TQ_STAT(b, tqs_misses);
1234 1234                          }
1235 1235                  } while ((tqe == NULL) && (loopcount > 0));
1236 1236          }
1237 1237  
1238 1238          /*
1239 1239           * At this point we either scheduled a task and (tqe != NULL) or failed
1240 1240           * (tqe == NULL). Try to recover from fails.
1241 1241           */
1242 1242  
1243 1243          /*
1244 1244           * For KM_SLEEP dispatches, try to extend the bucket and retry dispatch.
1245 1245           */
1246 1246          if ((tqe == NULL) && !(flags & TQ_NOSLEEP)) {
1247 1247                  /*
1248 1248                   * taskq_bucket_extend() may fail to do anything, but this is
1249 1249                   * fine - we deal with it later. If the bucket was successfully
1250 1250                   * extended, there is a good chance that taskq_bucket_dispatch()
1251 1251                   * will get this new entry, unless someone is racing with us and
1252 1252                   * stealing the new entry from under our nose.
1253 1253                   * taskq_bucket_extend() may sleep.
1254 1254                   */
1255 1255                  taskq_bucket_extend(bucket);
1256 1256                  TQ_STAT(bucket, tqs_disptcreates);
1257 1257                  if ((tqe = taskq_bucket_dispatch(bucket, func, arg)) != NULL)
1258 1258                          return ((taskqid_t)tqe);
1259 1259          }
1260 1260  
1261 1261          ASSERT(bucket != NULL);
1262 1262  
1263 1263          /*
1264 1264           * Since there are not enough free entries in the bucket, add a
1265 1265           * taskq entry to extend it in the background using backing queue
1266 1266           * (unless we already have a taskq entry to perform that extension).
1267 1267           */
1268 1268          mutex_enter(&tq->tq_lock);
1269 1269          if (!taskq_ent_exists(tq, taskq_bucket_extend, bucket)) {
1270 1270                  if ((tqe1 = taskq_ent_alloc(tq, TQ_NOSLEEP)) != NULL) {
1271 1271                          TQ_ENQUEUE_FRONT(tq, tqe1, taskq_bucket_extend, bucket);
1272 1272                  } else {
1273 1273                          TQ_STAT(bucket, tqs_nomem);
1274 1274                  }
1275 1275          }
1276 1276  
1277 1277          /*
1278 1278           * Dispatch failed and we can't find an entry to schedule a task.
1279 1279           * Revert to the backing queue unless TQ_NOQUEUE was asked.
1280 1280           */
1281 1281          if ((tqe == NULL) && !(flags & TQ_NOQUEUE)) {
1282 1282                  if ((tqe = taskq_ent_alloc(tq, flags)) != NULL) {
1283 1283                          TQ_ENQUEUE(tq, tqe, func, arg);
1284 1284                  } else {
1285 1285                          TQ_STAT(bucket, tqs_nomem);
1286 1286                  }
1287 1287          }
1288 1288          mutex_exit(&tq->tq_lock);
1289 1289  
1290 1290          return ((taskqid_t)tqe);
1291 1291  }
1292 1292  
1293 1293  void
1294 1294  taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
1295 1295      taskq_ent_t *tqe)
1296 1296  {
1297 1297          ASSERT(func != NULL);
1298 1298          ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
1299 1299  
1300 1300          /*
1301 1301           * Mark it as a prealloc'd task.  This is important
1302 1302           * to ensure that we don't free it later.
1303 1303           */
1304 1304          tqe->tqent_un.tqent_flags |= TQENT_FLAG_PREALLOC;
1305 1305          /*
1306 1306           * Enqueue the task to the underlying queue.
1307 1307           */
1308 1308          mutex_enter(&tq->tq_lock);
1309 1309  
1310 1310          if (flags & TQ_FRONT) {
1311 1311                  TQ_ENQUEUE_FRONT(tq, tqe, func, arg);
1312 1312          } else {
1313 1313                  TQ_ENQUEUE(tq, tqe, func, arg);
1314 1314          }
1315 1315          mutex_exit(&tq->tq_lock);
1316 1316  }
1317 1317  
1318 1318  /*
1319 1319   * Wait for all pending tasks to complete.
1320 1320   * Calling taskq_wait from a task will cause deadlock.
1321 1321   */
1322 1322  void
1323 1323  taskq_wait(taskq_t *tq)
1324 1324  {
1325 1325          ASSERT(tq != curthread->t_taskq);
1326 1326  
1327 1327          mutex_enter(&tq->tq_lock);
1328 1328          while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
1329 1329                  cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
1330 1330          mutex_exit(&tq->tq_lock);
1331 1331  
1332 1332          if (tq->tq_flags & TASKQ_DYNAMIC) {
1333 1333                  taskq_bucket_t *b = tq->tq_buckets;
1334 1334                  int bid = 0;
1335 1335                  for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
1336 1336                          mutex_enter(&b->tqbucket_lock);
1337 1337                          while (b->tqbucket_nalloc > 0)
1338 1338                                  cv_wait(&b->tqbucket_cv, &b->tqbucket_lock);
1339 1339                          mutex_exit(&b->tqbucket_lock);
1340 1340                  }
1341 1341          }
1342 1342  }
1343 1343  
1344 1344  /*
1345 1345   * Suspend execution of tasks.
1346 1346   *
1347 1347   * Tasks in the queue part will be suspended immediately upon return from this
1348 1348   * function. Pending tasks in the dynamic part will continue to execute, but all
1349 1349   * new tasks will  be suspended.
1350 1350   */
1351 1351  void
1352 1352  taskq_suspend(taskq_t *tq)
1353 1353  {
1354 1354          rw_enter(&tq->tq_threadlock, RW_WRITER);
1355 1355  
1356 1356          if (tq->tq_flags & TASKQ_DYNAMIC) {
1357 1357                  taskq_bucket_t *b = tq->tq_buckets;
1358 1358                  int bid = 0;
1359 1359                  for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
1360 1360                          mutex_enter(&b->tqbucket_lock);
1361 1361                          b->tqbucket_flags |= TQBUCKET_SUSPEND;
1362 1362                          mutex_exit(&b->tqbucket_lock);
1363 1363                  }
1364 1364          }
1365 1365          /*
1366 1366           * Mark task queue as being suspended. Needed for taskq_suspended().
1367 1367           */
1368 1368          mutex_enter(&tq->tq_lock);
1369 1369          ASSERT(!(tq->tq_flags & TASKQ_SUSPENDED));
1370 1370          tq->tq_flags |= TASKQ_SUSPENDED;
1371 1371          mutex_exit(&tq->tq_lock);
1372 1372  }
1373 1373  
1374 1374  /*
1375 1375   * returns: 1 if tq is suspended, 0 otherwise.
1376 1376   */
1377 1377  int
1378 1378  taskq_suspended(taskq_t *tq)
1379 1379  {
1380 1380          return ((tq->tq_flags & TASKQ_SUSPENDED) != 0);
1381 1381  }
1382 1382  
1383 1383  /*
1384 1384   * Resume taskq execution.
1385 1385   */
1386 1386  void
1387 1387  taskq_resume(taskq_t *tq)
1388 1388  {
1389 1389          ASSERT(RW_WRITE_HELD(&tq->tq_threadlock));
1390 1390  
1391 1391          if (tq->tq_flags & TASKQ_DYNAMIC) {
1392 1392                  taskq_bucket_t *b = tq->tq_buckets;
1393 1393                  int bid = 0;
1394 1394                  for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
1395 1395                          mutex_enter(&b->tqbucket_lock);
1396 1396                          b->tqbucket_flags &= ~TQBUCKET_SUSPEND;
1397 1397                          mutex_exit(&b->tqbucket_lock);
1398 1398                  }
1399 1399          }
1400 1400          mutex_enter(&tq->tq_lock);
1401 1401          ASSERT(tq->tq_flags & TASKQ_SUSPENDED);
1402 1402          tq->tq_flags &= ~TASKQ_SUSPENDED;
1403 1403          mutex_exit(&tq->tq_lock);
1404 1404  
1405 1405          rw_exit(&tq->tq_threadlock);
1406 1406  }
1407 1407  
1408 1408  int
1409 1409  taskq_member(taskq_t *tq, kthread_t *thread)
1410 1410  {
1411 1411          return (thread->t_taskq == tq);
1412 1412  }
1413 1413  
1414 1414  /*
1415 1415   * Creates a thread in the taskq.  We only allow one outstanding create at
1416 1416   * a time.  We drop and reacquire the tq_lock in order to avoid blocking other
1417 1417   * taskq activity while thread_create() or lwp_kernel_create() run.
1418 1418   *
1419 1419   * The first time we're called, we do some additional setup, and do not
1420 1420   * return until there are enough threads to start servicing requests.
1421 1421   */
1422 1422  static void
1423 1423  taskq_thread_create(taskq_t *tq)
1424 1424  {
1425 1425          kthread_t       *t;
1426 1426          const boolean_t first = (tq->tq_nthreads == 0);
1427 1427  
1428 1428          ASSERT(MUTEX_HELD(&tq->tq_lock));
1429 1429          ASSERT(tq->tq_flags & TASKQ_CHANGING);
1430 1430          ASSERT(tq->tq_nthreads < tq->tq_nthreads_target);
1431 1431          ASSERT(!(tq->tq_flags & TASKQ_THREAD_CREATED));
1432 1432  
1433 1433  
1434 1434          tq->tq_flags |= TASKQ_THREAD_CREATED;
1435 1435          tq->tq_active++;
1436 1436          mutex_exit(&tq->tq_lock);
1437 1437  
1438 1438          /*
1439 1439           * With TASKQ_DUTY_CYCLE the new thread must have an LWP
1440 1440           * as explained in ../disp/sysdc.c (for the msacct data).
1441 1441           * Otherwise simple kthreads are preferred.
1442 1442           */
1443 1443          if ((tq->tq_flags & TASKQ_DUTY_CYCLE) != 0) {
1444 1444                  /* Enforced in taskq_create_common */
1445 1445                  ASSERT3P(tq->tq_proc, !=, &p0);
1446 1446                  t = lwp_kernel_create(tq->tq_proc, taskq_thread, tq, TS_RUN,
1447 1447                      tq->tq_pri);
1448 1448          } else {
1449 1449                  t = thread_create(NULL, 0, taskq_thread, tq, 0, tq->tq_proc,
1450 1450                      TS_RUN, tq->tq_pri);
1451 1451          }
1452 1452  
1453 1453          if (!first) {
1454 1454                  mutex_enter(&tq->tq_lock);
1455 1455                  return;
1456 1456          }
1457 1457  
1458 1458          /*
1459 1459           * We know the thread cannot go away, since tq cannot be
1460 1460           * destroyed until creation has completed.  We can therefore
1461 1461           * safely dereference t.
1462 1462           */
1463 1463          if (tq->tq_flags & TASKQ_THREADS_CPU_PCT) {
1464 1464                  taskq_cpupct_install(tq, t->t_cpupart);
1465 1465          }
1466 1466          mutex_enter(&tq->tq_lock);
1467 1467  
1468 1468          /* Wait until we can service requests. */
1469 1469          while (tq->tq_nthreads != tq->tq_nthreads_target &&
1470 1470              tq->tq_nthreads < TASKQ_CREATE_ACTIVE_THREADS) {
1471 1471                  cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
1472 1472          }
1473 1473  }
1474 1474  
1475 1475  /*
1476 1476   * Common "sleep taskq thread" function, which handles CPR stuff, as well
1477 1477   * as giving a nice common point for debuggers to find inactive threads.
1478 1478   */
1479 1479  static clock_t
1480 1480  taskq_thread_wait(taskq_t *tq, kmutex_t *mx, kcondvar_t *cv,
1481 1481      callb_cpr_t *cprinfo, clock_t timeout)
1482 1482  {
1483 1483          clock_t ret = 0;
1484 1484  
1485 1485          if (!(tq->tq_flags & TASKQ_CPR_SAFE)) {
1486 1486                  CALLB_CPR_SAFE_BEGIN(cprinfo);
1487 1487          }
1488 1488          if (timeout < 0)
1489 1489                  cv_wait(cv, mx);
1490 1490          else
1491 1491                  ret = cv_reltimedwait(cv, mx, timeout, TR_CLOCK_TICK);
1492 1492  
1493 1493          if (!(tq->tq_flags & TASKQ_CPR_SAFE)) {
1494 1494                  CALLB_CPR_SAFE_END(cprinfo, mx);
1495 1495          }
1496 1496  
1497 1497          return (ret);
1498 1498  }
1499 1499  
1500 1500  /*
1501 1501   * Worker thread for processing task queue.
1502 1502   */
1503 1503  static void
1504 1504  taskq_thread(void *arg)
1505 1505  {
1506 1506          int thread_id;
1507 1507  
1508 1508          taskq_t *tq = arg;
1509 1509          taskq_ent_t *tqe;
1510 1510          callb_cpr_t cprinfo;
1511 1511          hrtime_t start, end;
1512 1512          boolean_t freeit;
1513 1513  
1514 1514          curthread->t_taskq = tq;        /* mark ourselves for taskq_member() */
1515 1515  
1516 1516          if (curproc != &p0 && (tq->tq_flags & TASKQ_DUTY_CYCLE)) {
1517 1517                  sysdc_thread_enter(curthread, tq->tq_DC,
1518 1518                      (tq->tq_flags & TASKQ_DC_BATCH) ? SYSDC_THREAD_BATCH : 0);
1519 1519          }
1520 1520  
1521 1521          if (tq->tq_flags & TASKQ_CPR_SAFE) {
1522 1522                  CALLB_CPR_INIT_SAFE(curthread, tq->tq_name);
1523 1523          } else {
1524 1524                  CALLB_CPR_INIT(&cprinfo, &tq->tq_lock, callb_generic_cpr,
1525 1525                      tq->tq_name);
1526 1526          }
1527 1527          mutex_enter(&tq->tq_lock);
1528 1528          thread_id = ++tq->tq_nthreads;
1529 1529          ASSERT(tq->tq_flags & TASKQ_THREAD_CREATED);
1530 1530          ASSERT(tq->tq_flags & TASKQ_CHANGING);
1531 1531          tq->tq_flags &= ~TASKQ_THREAD_CREATED;
1532 1532  
1533 1533          VERIFY3S(thread_id, <=, tq->tq_nthreads_max);
1534 1534  
1535 1535          if (tq->tq_nthreads_max == 1)
1536 1536                  tq->tq_thread = curthread;
1537 1537          else
1538 1538                  tq->tq_threadlist[thread_id - 1] = curthread;
1539 1539  
1540 1540          /* Allow taskq_create_common()'s taskq_thread_create() to return. */
1541 1541          if (tq->tq_nthreads == TASKQ_CREATE_ACTIVE_THREADS)
1542 1542                  cv_broadcast(&tq->tq_wait_cv);
1543 1543  
1544 1544          for (;;) {
1545 1545                  if (tq->tq_flags & TASKQ_CHANGING) {
1546 1546                          /* See if we're no longer needed */
1547 1547                          if (thread_id > tq->tq_nthreads_target) {
1548 1548                                  /*
1549 1549                                   * To preserve the one-to-one mapping between
1550 1550                                   * thread_id and thread, we must exit from
1551 1551                                   * highest thread ID to least.
1552 1552                                   *
1553 1553                                   * However, if everyone is exiting, the order
1554 1554                                   * doesn't matter, so just exit immediately.
1555 1555                                   * (this is safe, since you must wait for
1556 1556                                   * nthreads to reach 0 after setting
1557 1557                                   * tq_nthreads_target to 0)
1558 1558                                   */
1559 1559                                  if (thread_id == tq->tq_nthreads ||
1560 1560                                      tq->tq_nthreads_target == 0)
1561 1561                                          break;
1562 1562  
1563 1563                                  /* Wait for higher thread_ids to exit */
1564 1564                                  (void) taskq_thread_wait(tq, &tq->tq_lock,
1565 1565                                      &tq->tq_exit_cv, &cprinfo, -1);
1566 1566                                  continue;
1567 1567                          }
1568 1568  
1569 1569                          /*
1570 1570                           * If no thread is starting taskq_thread(), we can
1571 1571                           * do some bookkeeping.
1572 1572                           */
1573 1573                          if (!(tq->tq_flags & TASKQ_THREAD_CREATED)) {
1574 1574                                  /* Check if we've reached our target */
1575 1575                                  if (tq->tq_nthreads == tq->tq_nthreads_target) {
1576 1576                                          tq->tq_flags &= ~TASKQ_CHANGING;
1577 1577                                          cv_broadcast(&tq->tq_wait_cv);
1578 1578                                  }
1579 1579                                  /* Check if we need to create a thread */
1580 1580                                  if (tq->tq_nthreads < tq->tq_nthreads_target) {
1581 1581                                          taskq_thread_create(tq);
1582 1582                                          continue; /* tq_lock was dropped */
1583 1583                                  }
1584 1584                          }
1585 1585                  }
1586 1586                  if ((tqe = tq->tq_task.tqent_next) == &tq->tq_task) {
1587 1587                          if (--tq->tq_active == 0)
1588 1588                                  cv_broadcast(&tq->tq_wait_cv);
1589 1589                          (void) taskq_thread_wait(tq, &tq->tq_lock,
1590 1590                              &tq->tq_dispatch_cv, &cprinfo, -1);
1591 1591                          tq->tq_active++;
1592 1592                          continue;
1593 1593                  }
1594 1594  
1595 1595                  tqe->tqent_prev->tqent_next = tqe->tqent_next;
1596 1596                  tqe->tqent_next->tqent_prev = tqe->tqent_prev;
1597 1597                  mutex_exit(&tq->tq_lock);
1598 1598  
1599 1599                  /*
1600 1600                   * For prealloc'd tasks, we don't free anything.  We
1601 1601                   * have to check this now, because once we call the
1602 1602                   * function for a prealloc'd taskq, we can't touch the
1603 1603                   * tqent any longer (calling the function returns the
1604 1604                   * ownershp of the tqent back to caller of
1605 1605                   * taskq_dispatch.)
1606 1606                   */
1607 1607                  if ((!(tq->tq_flags & TASKQ_DYNAMIC)) &&
1608 1608                      (tqe->tqent_un.tqent_flags & TQENT_FLAG_PREALLOC)) {
1609 1609                          /* clear pointers to assist assertion checks */
1610 1610                          tqe->tqent_next = tqe->tqent_prev = NULL;
1611 1611                          freeit = B_FALSE;
1612 1612                  } else {
1613 1613                          freeit = B_TRUE;
1614 1614                  }
1615 1615  
1616 1616                  rw_enter(&tq->tq_threadlock, RW_READER);
1617 1617                  start = gethrtime();
1618 1618                  DTRACE_PROBE2(taskq__exec__start, taskq_t *, tq,
1619 1619                      taskq_ent_t *, tqe);
1620 1620                  tqe->tqent_func(tqe->tqent_arg);
1621 1621                  DTRACE_PROBE2(taskq__exec__end, taskq_t *, tq,
1622 1622                      taskq_ent_t *, tqe);
1623 1623                  end = gethrtime();
1624 1624                  rw_exit(&tq->tq_threadlock);
1625 1625  
1626 1626                  mutex_enter(&tq->tq_lock);
1627 1627                  tq->tq_totaltime += end - start;
1628 1628                  tq->tq_executed++;
1629 1629  
1630 1630                  if (freeit)
1631 1631                          taskq_ent_free(tq, tqe);
1632 1632          }
1633 1633  
1634 1634          if (tq->tq_nthreads_max == 1)
1635 1635                  tq->tq_thread = NULL;
1636 1636          else
1637 1637                  tq->tq_threadlist[thread_id - 1] = NULL;
1638 1638  
1639 1639          /* We're exiting, and therefore no longer active */
1640 1640          ASSERT(tq->tq_active > 0);
1641 1641          tq->tq_active--;
1642 1642  
1643 1643          ASSERT(tq->tq_nthreads > 0);
1644 1644          tq->tq_nthreads--;
1645 1645  
1646 1646          /* Wake up anyone waiting for us to exit */
1647 1647          cv_broadcast(&tq->tq_exit_cv);
1648 1648          if (tq->tq_nthreads == tq->tq_nthreads_target) {
1649 1649                  if (!(tq->tq_flags & TASKQ_THREAD_CREATED))
1650 1650                          tq->tq_flags &= ~TASKQ_CHANGING;
1651 1651  
1652 1652                  cv_broadcast(&tq->tq_wait_cv);
1653 1653          }
1654 1654  
1655 1655          ASSERT(!(tq->tq_flags & TASKQ_CPR_SAFE));
1656 1656          CALLB_CPR_EXIT(&cprinfo);               /* drops tq->tq_lock */
1657 1657          if (curthread->t_lwp != NULL) {
1658 1658                  mutex_enter(&curproc->p_lock);
1659 1659                  lwp_exit();
1660 1660          } else {
1661 1661                  thread_exit();
1662 1662          }
1663 1663  }
1664 1664  
1665 1665  /*
1666 1666   * Worker per-entry thread for dynamic dispatches.
1667 1667   */
1668 1668  static void
1669 1669  taskq_d_thread(taskq_ent_t *tqe)
1670 1670  {
1671 1671          taskq_bucket_t  *bucket = tqe->tqent_un.tqent_bucket;
1672 1672          taskq_t         *tq = bucket->tqbucket_taskq;
1673 1673          kmutex_t        *lock = &bucket->tqbucket_lock;
1674 1674          kcondvar_t      *cv = &tqe->tqent_cv;
1675 1675          callb_cpr_t     cprinfo;
1676 1676          clock_t         w;
1677 1677  
1678 1678          CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, tq->tq_name);
1679 1679  
1680 1680          mutex_enter(lock);
1681 1681  
1682 1682          for (;;) {
1683 1683                  /*
1684 1684                   * If a task is scheduled (func != NULL), execute it, otherwise
1685 1685                   * sleep, waiting for a job.
1686 1686                   */
1687 1687                  if (tqe->tqent_func != NULL) {
1688 1688                          hrtime_t        start;
1689 1689                          hrtime_t        end;
1690 1690  
1691 1691                          ASSERT(bucket->tqbucket_nalloc > 0);
1692 1692  
1693 1693                          /*
1694 1694                           * It is possible to free the entry right away before
1695 1695                           * actually executing the task so that subsequent
1696 1696                           * dispatches may immediately reuse it. But this,
1697 1697                           * effectively, creates a two-length queue in the entry
1698 1698                           * and may lead to a deadlock if the execution of the
1699 1699                           * current task depends on the execution of the next
1700 1700                           * scheduled task. So, we keep the entry busy until the
1701 1701                           * task is processed.
1702 1702                           */
1703 1703  
1704 1704                          mutex_exit(lock);
1705 1705                          start = gethrtime();
1706 1706                          DTRACE_PROBE3(taskq__d__exec__start, taskq_t *, tq,
1707 1707                              taskq_bucket_t *, bucket, taskq_ent_t *, tqe);
1708 1708                          tqe->tqent_func(tqe->tqent_arg);
1709 1709                          DTRACE_PROBE3(taskq__d__exec__end, taskq_t *, tq,
1710 1710                              taskq_bucket_t *, bucket, taskq_ent_t *, tqe);
1711 1711                          end = gethrtime();
1712 1712                          mutex_enter(lock);
1713 1713                          bucket->tqbucket_totaltime += end - start;
1714 1714  
1715 1715                          /*
1716 1716                           * Return the entry to the bucket free list.
1717 1717                           */
1718 1718                          tqe->tqent_func = NULL;
1719 1719                          TQ_APPEND(bucket->tqbucket_freelist, tqe);
1720 1720                          bucket->tqbucket_nalloc--;
1721 1721                          bucket->tqbucket_nfree++;
1722 1722                          ASSERT(!IS_EMPTY(bucket->tqbucket_freelist));
1723 1723                          /*
1724 1724                           * taskq_wait() waits for nalloc to drop to zero on
1725 1725                           * tqbucket_cv.
1726 1726                           */
1727 1727                          cv_signal(&bucket->tqbucket_cv);
1728 1728                  }
1729 1729  
1730 1730                  /*
1731 1731                   * At this point the entry must be in the bucket free list -
1732 1732                   * either because it was there initially or because it just
1733 1733                   * finished executing a task and put itself on the free list.
1734 1734                   */
1735 1735                  ASSERT(bucket->tqbucket_nfree > 0);
1736 1736                  /*
1737 1737                   * Go to sleep unless we are closing.
1738 1738                   * If a thread is sleeping too long, it dies.
1739 1739                   */
1740 1740                  if (! (bucket->tqbucket_flags & TQBUCKET_CLOSE)) {
1741 1741                          w = taskq_thread_wait(tq, lock, cv,
1742 1742                              &cprinfo, taskq_thread_timeout * hz);
1743 1743                  }
1744 1744  
1745 1745                  /*
1746 1746                   * At this point we may be in two different states:
1747 1747                   *
1748 1748                   * (1) tqent_func is set which means that a new task is
1749 1749                   *      dispatched and we need to execute it.
1750 1750                   *
1751 1751                   * (2) Thread is sleeping for too long or we are closing. In
1752 1752                   *      both cases destroy the thread and the entry.
1753 1753                   */
1754 1754  
1755 1755                  /* If func is NULL we should be on the freelist. */
1756 1756                  ASSERT((tqe->tqent_func != NULL) ||
1757 1757                      (bucket->tqbucket_nfree > 0));
1758 1758                  /* If func is non-NULL we should be allocated */
1759 1759                  ASSERT((tqe->tqent_func == NULL) ||
1760 1760                      (bucket->tqbucket_nalloc > 0));
1761 1761  
1762 1762                  /* Check freelist consistency */
1763 1763                  ASSERT((bucket->tqbucket_nfree > 0) ||
1764 1764                      IS_EMPTY(bucket->tqbucket_freelist));
1765 1765                  ASSERT((bucket->tqbucket_nfree == 0) ||
1766 1766                      !IS_EMPTY(bucket->tqbucket_freelist));
1767 1767  
1768 1768                  if ((tqe->tqent_func == NULL) &&
1769 1769                      ((w == -1) || (bucket->tqbucket_flags & TQBUCKET_CLOSE))) {
1770 1770                          /*
1771 1771                           * This thread is sleeping for too long or we are
1772 1772                           * closing - time to die.
1773 1773                           * Thread creation/destruction happens rarely,
1774 1774                           * so grabbing the lock is not a big performance issue.
1775 1775                           * The bucket lock is dropped by CALLB_CPR_EXIT().
1776 1776                           */
1777 1777  
1778 1778                          /* Remove the entry from the free list. */
1779 1779                          tqe->tqent_prev->tqent_next = tqe->tqent_next;
1780 1780                          tqe->tqent_next->tqent_prev = tqe->tqent_prev;
1781 1781                          ASSERT(bucket->tqbucket_nfree > 0);
1782 1782                          bucket->tqbucket_nfree--;
1783 1783  
1784 1784                          TQ_STAT(bucket, tqs_tdeaths);
1785 1785                          cv_signal(&bucket->tqbucket_cv);
1786 1786                          tqe->tqent_thread = NULL;
1787 1787                          mutex_enter(&tq->tq_lock);
1788 1788                          tq->tq_tdeaths++;
1789 1789                          mutex_exit(&tq->tq_lock);
1790 1790                          CALLB_CPR_EXIT(&cprinfo);
1791 1791                          kmem_cache_free(taskq_ent_cache, tqe);
1792 1792                          thread_exit();
1793 1793                  }
1794 1794          }
1795 1795  }
1796 1796  
1797 1797  
1798 1798  /*
1799 1799   * Taskq creation. May sleep for memory.
1800 1800   * Always use automatically generated instances to avoid kstat name space
1801 1801   * collisions.
1802 1802   */
1803 1803  
1804 1804  taskq_t *
1805 1805  taskq_create(const char *name, int nthreads, pri_t pri, int minalloc,
1806 1806      int maxalloc, uint_t flags)
1807 1807  {
1808 1808          ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
1809 1809  
1810 1810          return (taskq_create_common(name, 0, nthreads, pri, minalloc,
1811 1811              maxalloc, &p0, 0, flags | TASKQ_NOINSTANCE));
1812 1812  }
1813 1813  
1814 1814  /*
1815 1815   * Create an instance of task queue. It is legal to create task queues with the
1816 1816   * same name and different instances.
1817 1817   *
1818 1818   * taskq_create_instance is used by ddi_taskq_create() where it gets the
1819 1819   * instance from ddi_get_instance(). In some cases the instance is not
1820 1820   * initialized and is set to -1. This case is handled as if no instance was
1821 1821   * passed at all.
1822 1822   */
1823 1823  taskq_t *
1824 1824  taskq_create_instance(const char *name, int instance, int nthreads, pri_t pri,
1825 1825      int minalloc, int maxalloc, uint_t flags)
1826 1826  {
1827 1827          ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
1828 1828          ASSERT((instance >= 0) || (instance == -1));
1829 1829  
1830 1830          if (instance < 0) {
1831 1831                  flags |= TASKQ_NOINSTANCE;
1832 1832          }
1833 1833  
1834 1834          return (taskq_create_common(name, instance, nthreads,
1835 1835              pri, minalloc, maxalloc, &p0, 0, flags));
1836 1836  }
1837 1837  
1838 1838  taskq_t *
1839 1839  taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc,
1840 1840      int maxalloc, proc_t *proc, uint_t flags)
1841 1841  {
1842 1842          ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
1843 1843          ASSERT(proc->p_flag & SSYS);
1844 1844  
1845 1845          return (taskq_create_common(name, 0, nthreads, pri, minalloc,
1846 1846              maxalloc, proc, 0, flags | TASKQ_NOINSTANCE));
1847 1847  }
1848 1848  
1849 1849  taskq_t *
1850 1850  taskq_create_sysdc(const char *name, int nthreads, int minalloc,
1851 1851      int maxalloc, proc_t *proc, uint_t dc, uint_t flags)
1852 1852  {
1853 1853          ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
1854 1854          ASSERT(proc->p_flag & SSYS);
1855 1855  
1856 1856          return (taskq_create_common(name, 0, nthreads, minclsyspri, minalloc,
1857 1857              maxalloc, proc, dc, flags | TASKQ_NOINSTANCE | TASKQ_DUTY_CYCLE));
1858 1858  }
1859 1859  
1860 1860  static taskq_t *
1861 1861  taskq_create_common(const char *name, int instance, int nthreads, pri_t pri,
1862 1862      int minalloc, int maxalloc, proc_t *proc, uint_t dc, uint_t flags)
1863 1863  {
1864 1864          taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_SLEEP);
1865 1865          uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus);
1866 1866          uint_t bsize;   /* # of buckets - always power of 2 */
1867 1867          int max_nthreads;
1868 1868  
1869 1869          /*
1870 1870           * TASKQ_DYNAMIC, TASKQ_CPR_SAFE and TASKQ_THREADS_CPU_PCT are all
1871 1871           * mutually incompatible.
1872 1872           */
1873 1873          IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_CPR_SAFE));
1874 1874          IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_THREADS_CPU_PCT));
1875 1875          IMPLY((flags & TASKQ_CPR_SAFE), !(flags & TASKQ_THREADS_CPU_PCT));
1876 1876  
1877 1877          /* Cannot have DYNAMIC with DUTY_CYCLE */
1878 1878          IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_DUTY_CYCLE));
1879 1879  
1880 1880          /* Cannot have DUTY_CYCLE with a p0 kernel process */
1881 1881          IMPLY((flags & TASKQ_DUTY_CYCLE), proc != &p0);
1882 1882  
1883 1883          /* Cannot have DC_BATCH without DUTY_CYCLE */
1884 1884          ASSERT((flags & (TASKQ_DUTY_CYCLE|TASKQ_DC_BATCH)) != TASKQ_DC_BATCH);
1885 1885  
1886 1886          ASSERT(proc != NULL);
1887 1887  
1888 1888          bsize = 1 << (highbit(ncpus) - 1);
1889 1889          ASSERT(bsize >= 1);
1890 1890          bsize = MIN(bsize, taskq_maxbuckets);
1891 1891  
1892 1892          if (flags & TASKQ_DYNAMIC) {
1893 1893                  ASSERT3S(nthreads, >=, 1);
1894 1894                  tq->tq_maxsize = nthreads;
1895 1895  
1896 1896                  /* For dynamic task queues use just one backup thread */
1897 1897                  nthreads = max_nthreads = 1;
1898 1898  
1899 1899          } else if (flags & TASKQ_THREADS_CPU_PCT) {
1900 1900                  uint_t pct;
1901 1901                  ASSERT3S(nthreads, >=, 0);
1902 1902                  pct = nthreads;
1903 1903  
1904 1904                  if (pct > taskq_cpupct_max_percent)
1905 1905                          pct = taskq_cpupct_max_percent;
1906 1906  
1907 1907                  /*
1908 1908                   * If you're using THREADS_CPU_PCT, the process for the
1909 1909                   * taskq threads must be curproc.  This allows any pset
1910 1910                   * binding to be inherited correctly.  If proc is &p0,
1911 1911                   * we won't be creating LWPs, so new threads will be assigned
1912 1912                   * to the default processor set.
1913 1913                   */
1914 1914                  ASSERT(curproc == proc || proc == &p0);
1915 1915                  tq->tq_threads_ncpus_pct = pct;
1916 1916                  nthreads = 1;           /* corrected in taskq_thread_create() */
1917 1917                  max_nthreads = TASKQ_THREADS_PCT(max_ncpus, pct);
1918 1918  
1919 1919          } else {
1920 1920                  ASSERT3S(nthreads, >=, 1);
1921 1921                  max_nthreads = nthreads;
1922 1922          }
1923 1923  
1924 1924          if (max_nthreads < taskq_minimum_nthreads_max)
1925 1925                  max_nthreads = taskq_minimum_nthreads_max;
1926 1926  
1927 1927          /*
1928 1928           * Make sure the name is 0-terminated, and conforms to the rules for
1929 1929           * C indentifiers
1930 1930           */
1931 1931          (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
1932 1932          strident_canon(tq->tq_name, TASKQ_NAMELEN + 1);
1933 1933  
1934 1934          tq->tq_flags = flags | TASKQ_CHANGING;
1935 1935          tq->tq_active = 0;
1936 1936          tq->tq_instance = instance;
1937 1937          tq->tq_nthreads_target = nthreads;
1938 1938          tq->tq_nthreads_max = max_nthreads;
1939 1939          tq->tq_minalloc = minalloc;
1940 1940          tq->tq_maxalloc = maxalloc;
1941 1941          tq->tq_nbuckets = bsize;
1942 1942          tq->tq_proc = proc;
1943 1943          tq->tq_pri = pri;
1944 1944          tq->tq_DC = dc;
1945 1945          list_link_init(&tq->tq_cpupct_link);
1946 1946  
1947 1947          if (max_nthreads > 1)
1948 1948                  tq->tq_threadlist = kmem_alloc(
1949 1949                      sizeof (kthread_t *) * max_nthreads, KM_SLEEP);
1950 1950  
1951 1951          mutex_enter(&tq->tq_lock);
1952 1952          if (flags & TASKQ_PREPOPULATE) {
1953 1953                  while (minalloc-- > 0)
1954 1954                          taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
1955 1955          }
1956 1956  
1957 1957          /*
1958 1958           * Before we start creating threads for this taskq, take a
1959 1959           * zone hold so the zone can't go away before taskq_destroy
1960 1960           * makes sure all the taskq threads are gone.  This hold is
1961 1961           * similar in purpose to those taken by zthread_create().
1962 1962           */
1963 1963          zone_hold(tq->tq_proc->p_zone);
1964 1964  
1965 1965          /*
1966 1966           * Create the first thread, which will create any other threads
1967 1967           * necessary.  taskq_thread_create will not return until we have
1968 1968           * enough threads to be able to process requests.
1969 1969           */
1970 1970          taskq_thread_create(tq);
1971 1971          mutex_exit(&tq->tq_lock);
1972 1972  
1973 1973          if (flags & TASKQ_DYNAMIC) {
1974 1974                  taskq_bucket_t *bucket = kmem_zalloc(sizeof (taskq_bucket_t) *
1975 1975                      bsize, KM_SLEEP);
1976 1976                  int b_id;
1977 1977  
1978 1978                  tq->tq_buckets = bucket;
1979 1979  
1980 1980                  /* Initialize each bucket */
1981 1981                  for (b_id = 0; b_id < bsize; b_id++, bucket++) {
1982 1982                          mutex_init(&bucket->tqbucket_lock, NULL, MUTEX_DEFAULT,
1983 1983                              NULL);
1984 1984                          cv_init(&bucket->tqbucket_cv, NULL, CV_DEFAULT, NULL);
1985 1985                          bucket->tqbucket_taskq = tq;
1986 1986                          bucket->tqbucket_freelist.tqent_next =
1987 1987                              bucket->tqbucket_freelist.tqent_prev =
1988 1988                              &bucket->tqbucket_freelist;
1989 1989                          if (flags & TASKQ_PREPOPULATE)
1990 1990                                  taskq_bucket_extend(bucket);
1991 1991                  }
1992 1992          }
1993 1993  
1994 1994          /*
1995 1995           * Install kstats.
1996 1996           * We have two cases:
1997 1997           *   1) Instance is provided to taskq_create_instance(). In this case it
1998 1998           *      should be >= 0 and we use it.
1999 1999           *
2000 2000           *   2) Instance is not provided and is automatically generated
2001 2001           */
2002 2002          if (flags & TASKQ_NOINSTANCE) {
2003 2003                  instance = tq->tq_instance =
2004 2004                      (int)(uintptr_t)vmem_alloc(taskq_id_arena, 1, VM_SLEEP);
2005 2005          }
2006 2006  
2007 2007          if (flags & TASKQ_DYNAMIC) {
2008 2008                  if ((tq->tq_kstat = kstat_create("unix", instance,
2009 2009                      tq->tq_name, "taskq_d", KSTAT_TYPE_NAMED,
2010 2010                      sizeof (taskq_d_kstat) / sizeof (kstat_named_t),
2011 2011                      KSTAT_FLAG_VIRTUAL)) != NULL) {
2012 2012                          tq->tq_kstat->ks_lock = &taskq_d_kstat_lock;
2013 2013                          tq->tq_kstat->ks_data = &taskq_d_kstat;
2014 2014                          tq->tq_kstat->ks_update = taskq_d_kstat_update;
2015 2015                          tq->tq_kstat->ks_private = tq;
2016 2016                          kstat_install(tq->tq_kstat);
2017 2017                  }
2018 2018          } else {
2019 2019                  if ((tq->tq_kstat = kstat_create("unix", instance, tq->tq_name,
2020 2020                      "taskq", KSTAT_TYPE_NAMED,
2021 2021                      sizeof (taskq_kstat) / sizeof (kstat_named_t),
2022 2022                      KSTAT_FLAG_VIRTUAL)) != NULL) {
2023 2023                          tq->tq_kstat->ks_lock = &taskq_kstat_lock;
2024 2024                          tq->tq_kstat->ks_data = &taskq_kstat;
2025 2025                          tq->tq_kstat->ks_update = taskq_kstat_update;
2026 2026                          tq->tq_kstat->ks_private = tq;
2027 2027                          kstat_install(tq->tq_kstat);
2028 2028                  }
2029 2029          }
2030 2030  
2031 2031          return (tq);
2032 2032  }
2033 2033  
2034 2034  /*
2035 2035   * taskq_destroy().
2036 2036   *
2037 2037   * Assumes: by the time taskq_destroy is called no one will use this task queue
2038 2038   * in any way and no one will try to dispatch entries in it.
2039 2039   */
2040 2040  void
2041 2041  taskq_destroy(taskq_t *tq)
2042 2042  {
2043 2043          taskq_bucket_t *b = tq->tq_buckets;
2044 2044          int bid = 0;
2045 2045  
2046 2046          ASSERT(! (tq->tq_flags & TASKQ_CPR_SAFE));
2047 2047  
2048 2048          /*
2049 2049           * Destroy kstats.
2050 2050           */
2051 2051          if (tq->tq_kstat != NULL) {
2052 2052                  kstat_delete(tq->tq_kstat);
2053 2053                  tq->tq_kstat = NULL;
2054 2054          }
2055 2055  
2056 2056          /*
2057 2057           * Destroy instance if needed.
2058 2058           */
2059 2059          if (tq->tq_flags & TASKQ_NOINSTANCE) {
2060 2060                  vmem_free(taskq_id_arena, (void *)(uintptr_t)(tq->tq_instance),
2061 2061                      1);
2062 2062                  tq->tq_instance = 0;
2063 2063          }
2064 2064  
2065 2065          /*
2066 2066           * Unregister from the cpupct list.
2067 2067           */
2068 2068          if (tq->tq_flags & TASKQ_THREADS_CPU_PCT) {
2069 2069                  taskq_cpupct_remove(tq);
2070 2070          }
2071 2071  
2072 2072          /*
2073 2073           * Wait for any pending entries to complete.
2074 2074           */
2075 2075          taskq_wait(tq);
2076 2076  
2077 2077          mutex_enter(&tq->tq_lock);
2078 2078          ASSERT((tq->tq_task.tqent_next == &tq->tq_task) &&
2079 2079              (tq->tq_active == 0));
2080 2080  
2081 2081          /* notify all the threads that they need to exit */
2082 2082          tq->tq_nthreads_target = 0;
2083 2083  
2084 2084          tq->tq_flags |= TASKQ_CHANGING;
2085 2085          cv_broadcast(&tq->tq_dispatch_cv);
2086 2086          cv_broadcast(&tq->tq_exit_cv);
2087 2087  
2088 2088          while (tq->tq_nthreads != 0)
2089 2089                  cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
2090 2090  
2091 2091          if (tq->tq_nthreads_max != 1)
2092 2092                  kmem_free(tq->tq_threadlist, sizeof (kthread_t *) *
2093 2093                      tq->tq_nthreads_max);
2094 2094  
2095 2095          tq->tq_minalloc = 0;
2096 2096          while (tq->tq_nalloc != 0)
2097 2097                  taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
2098 2098  
2099 2099          mutex_exit(&tq->tq_lock);
2100 2100  
2101 2101          /*
2102 2102           * Mark each bucket as closing and wakeup all sleeping threads.
2103 2103           */
2104 2104          for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
2105 2105                  taskq_ent_t *tqe;
2106 2106  
2107 2107                  mutex_enter(&b->tqbucket_lock);
2108 2108  
2109 2109                  b->tqbucket_flags |= TQBUCKET_CLOSE;
2110 2110                  /* Wakeup all sleeping threads */
2111 2111  
2112 2112                  for (tqe = b->tqbucket_freelist.tqent_next;
2113 2113                      tqe != &b->tqbucket_freelist; tqe = tqe->tqent_next)
2114 2114                          cv_signal(&tqe->tqent_cv);
2115 2115  
2116 2116                  ASSERT(b->tqbucket_nalloc == 0);
2117 2117  
2118 2118                  /*
2119 2119                   * At this point we waited for all pending jobs to complete (in
2120 2120                   * both the task queue and the bucket and no new jobs should
2121 2121                   * arrive. Wait for all threads to die.
2122 2122                   */
2123 2123                  while (b->tqbucket_nfree > 0)
2124 2124                          cv_wait(&b->tqbucket_cv, &b->tqbucket_lock);
2125 2125                  mutex_exit(&b->tqbucket_lock);
2126 2126                  mutex_destroy(&b->tqbucket_lock);
2127 2127                  cv_destroy(&b->tqbucket_cv);
2128 2128          }
2129 2129  
2130 2130          if (tq->tq_buckets != NULL) {
2131 2131                  ASSERT(tq->tq_flags & TASKQ_DYNAMIC);
2132 2132                  kmem_free(tq->tq_buckets,
2133 2133                      sizeof (taskq_bucket_t) * tq->tq_nbuckets);
2134 2134  
2135 2135                  /* Cleanup fields before returning tq to the cache */
2136 2136                  tq->tq_buckets = NULL;
2137 2137                  tq->tq_tcreates = 0;
2138 2138                  tq->tq_tdeaths = 0;
2139 2139          } else {
2140 2140                  ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
2141 2141          }
2142 2142  
2143 2143          /*
2144 2144           * Now that all the taskq threads are gone, we can
2145 2145           * drop the zone hold taken in taskq_create_common
2146 2146           */
2147 2147          zone_rele(tq->tq_proc->p_zone);
2148 2148  
2149 2149          tq->tq_threads_ncpus_pct = 0;
2150 2150          tq->tq_totaltime = 0;
2151 2151          tq->tq_tasks = 0;
2152 2152          tq->tq_maxtasks = 0;
2153 2153          tq->tq_executed = 0;
2154 2154          kmem_cache_free(taskq_cache, tq);
2155 2155  }
2156 2156  
2157 2157  /*
2158 2158   * Extend a bucket with a new entry on the free list and attach a worker thread
2159 2159   * to it.
2160 2160   *
2161 2161   * Argument: pointer to the bucket.
2162 2162   *
2163 2163   * This function may quietly fail. It is only used by taskq_dispatch() which
2164 2164   * handles such failures properly.
2165 2165   */
2166 2166  static void
2167 2167  taskq_bucket_extend(void *arg)
2168 2168  {
2169 2169          taskq_ent_t *tqe;
2170 2170          taskq_bucket_t *b = (taskq_bucket_t *)arg;
2171 2171          taskq_t *tq = b->tqbucket_taskq;
2172 2172          int nthreads;
2173 2173  
2174 2174          if (! ENOUGH_MEMORY()) {
2175 2175                  TQ_STAT(b, tqs_nomem);
2176 2176                  return;
2177 2177          }
2178 2178  
2179 2179          mutex_enter(&tq->tq_lock);
2180 2180  
2181 2181          /*
2182 2182           * Observe global taskq limits on the number of threads.
2183 2183           */
2184 2184          if (tq->tq_tcreates++ - tq->tq_tdeaths > tq->tq_maxsize) {
2185 2185                  tq->tq_tcreates--;
2186 2186                  mutex_exit(&tq->tq_lock);
2187 2187                  return;
2188 2188          }
2189 2189          mutex_exit(&tq->tq_lock);
2190 2190  
2191 2191          tqe = kmem_cache_alloc(taskq_ent_cache, KM_NOSLEEP);
2192 2192  
2193 2193          if (tqe == NULL) {
2194 2194                  mutex_enter(&tq->tq_lock);
2195 2195                  TQ_STAT(b, tqs_nomem);
2196 2196                  tq->tq_tcreates--;
2197 2197                  mutex_exit(&tq->tq_lock);
2198 2198                  return;
2199 2199          }
2200 2200  
2201 2201          ASSERT(tqe->tqent_thread == NULL);
2202 2202  
2203 2203          tqe->tqent_un.tqent_bucket = b;
2204 2204  
2205 2205          /*
2206 2206           * Create a thread in a TS_STOPPED state first. If it is successfully
2207 2207           * created, place the entry on the free list and start the thread.
2208 2208           */
2209 2209          tqe->tqent_thread = thread_create(NULL, 0, taskq_d_thread, tqe,
2210 2210              0, tq->tq_proc, TS_STOPPED, tq->tq_pri);
2211 2211  
2212 2212          /*
2213 2213           * Once the entry is ready, link it to the the bucket free list.
2214 2214           */
2215 2215          mutex_enter(&b->tqbucket_lock);
2216 2216          tqe->tqent_func = NULL;
2217 2217          TQ_APPEND(b->tqbucket_freelist, tqe);
2218 2218          b->tqbucket_nfree++;
2219 2219          TQ_STAT(b, tqs_tcreates);
2220 2220  
2221 2221  #if TASKQ_STATISTIC
2222 2222          nthreads = b->tqbucket_stat.tqs_tcreates -
2223 2223              b->tqbucket_stat.tqs_tdeaths;
2224 2224          b->tqbucket_stat.tqs_maxthreads = MAX(nthreads,
2225 2225              b->tqbucket_stat.tqs_maxthreads);
2226 2226  #endif
2227 2227  
2228 2228          mutex_exit(&b->tqbucket_lock);
2229 2229          /*
2230 2230           * Start the stopped thread.
2231 2231           */
2232 2232          thread_lock(tqe->tqent_thread);
2233 2233          tqe->tqent_thread->t_taskq = tq;
2234 2234          tqe->tqent_thread->t_schedflag |= TS_ALLSTART;
2235 2235          setrun_locked(tqe->tqent_thread);
2236 2236          thread_unlock(tqe->tqent_thread);
2237 2237  }
2238 2238  
2239 2239  static int
2240 2240  taskq_kstat_update(kstat_t *ksp, int rw)
2241 2241  {
2242 2242          struct taskq_kstat *tqsp = &taskq_kstat;
2243 2243          taskq_t *tq = ksp->ks_private;
2244 2244  
2245 2245          if (rw == KSTAT_WRITE)
2246 2246                  return (EACCES);
2247 2247  
2248 2248          tqsp->tq_pid.value.ui64 = tq->tq_proc->p_pid;
2249 2249          tqsp->tq_tasks.value.ui64 = tq->tq_tasks;
2250 2250          tqsp->tq_executed.value.ui64 = tq->tq_executed;
2251 2251          tqsp->tq_maxtasks.value.ui64 = tq->tq_maxtasks;
2252 2252          tqsp->tq_totaltime.value.ui64 = tq->tq_totaltime;
2253 2253          tqsp->tq_nactive.value.ui64 = tq->tq_active;
2254 2254          tqsp->tq_nalloc.value.ui64 = tq->tq_nalloc;
2255 2255          tqsp->tq_pri.value.ui64 = tq->tq_pri;
2256 2256          tqsp->tq_nthreads.value.ui64 = tq->tq_nthreads;
2257 2257          return (0);
2258 2258  }
2259 2259  
2260 2260  static int
2261 2261  taskq_d_kstat_update(kstat_t *ksp, int rw)
2262 2262  {
2263 2263          struct taskq_d_kstat *tqsp = &taskq_d_kstat;
2264 2264          taskq_t *tq = ksp->ks_private;
2265 2265          taskq_bucket_t *b = tq->tq_buckets;
2266 2266          int bid = 0;
2267 2267  
2268 2268          if (rw == KSTAT_WRITE)
2269 2269                  return (EACCES);
2270 2270  
2271 2271          ASSERT(tq->tq_flags & TASKQ_DYNAMIC);
2272 2272  
2273 2273          tqsp->tqd_btasks.value.ui64 = tq->tq_tasks;
2274 2274          tqsp->tqd_bexecuted.value.ui64 = tq->tq_executed;
2275 2275          tqsp->tqd_bmaxtasks.value.ui64 = tq->tq_maxtasks;
2276 2276          tqsp->tqd_bnalloc.value.ui64 = tq->tq_nalloc;
2277 2277          tqsp->tqd_bnactive.value.ui64 = tq->tq_active;
2278 2278          tqsp->tqd_btotaltime.value.ui64 = tq->tq_totaltime;
2279 2279          tqsp->tqd_pri.value.ui64 = tq->tq_pri;
2280 2280  
2281 2281          tqsp->tqd_hits.value.ui64 = 0;
2282 2282          tqsp->tqd_misses.value.ui64 = 0;
2283 2283          tqsp->tqd_overflows.value.ui64 = 0;
2284 2284          tqsp->tqd_tcreates.value.ui64 = 0;
2285 2285          tqsp->tqd_tdeaths.value.ui64 = 0;
2286 2286          tqsp->tqd_maxthreads.value.ui64 = 0;
2287 2287          tqsp->tqd_nomem.value.ui64 = 0;
2288 2288          tqsp->tqd_disptcreates.value.ui64 = 0;
2289 2289          tqsp->tqd_totaltime.value.ui64 = 0;
2290 2290          tqsp->tqd_nalloc.value.ui64 = 0;
2291 2291          tqsp->tqd_nfree.value.ui64 = 0;
2292 2292  
2293 2293          for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
2294 2294                  tqsp->tqd_hits.value.ui64 += b->tqbucket_stat.tqs_hits;
2295 2295                  tqsp->tqd_misses.value.ui64 += b->tqbucket_stat.tqs_misses;
2296 2296                  tqsp->tqd_overflows.value.ui64 += b->tqbucket_stat.tqs_overflow;
2297 2297                  tqsp->tqd_tcreates.value.ui64 += b->tqbucket_stat.tqs_tcreates;
2298 2298                  tqsp->tqd_tdeaths.value.ui64 += b->tqbucket_stat.tqs_tdeaths;
2299 2299                  tqsp->tqd_maxthreads.value.ui64 +=
2300 2300                      b->tqbucket_stat.tqs_maxthreads;
2301 2301                  tqsp->tqd_nomem.value.ui64 += b->tqbucket_stat.tqs_nomem;
2302 2302                  tqsp->tqd_disptcreates.value.ui64 +=
2303 2303                      b->tqbucket_stat.tqs_disptcreates;
2304 2304                  tqsp->tqd_totaltime.value.ui64 += b->tqbucket_totaltime;
2305 2305                  tqsp->tqd_nalloc.value.ui64 += b->tqbucket_nalloc;
2306 2306                  tqsp->tqd_nfree.value.ui64 += b->tqbucket_nfree;
2307 2307          }
2308 2308          return (0);
2309 2309  }

↓ open down ↓

2156 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX