illumos-gate Wdiff usr/src/uts/common/disp/thread.c

Print this page

8158 Want named threads API
9857 proc manpages should have LIBRARY section

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/disp/thread.c
          +++ new/usr/src/uts/common/disp/thread.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each

↓ open down ↓

13 lines elided

↑ open up ↑

  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
       24 + * Copyright (c) 2018 Joyent, Inc.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/signal.h>
  31   31  #include <sys/stack.h>
  32   32  #include <sys/pcb.h>
  33   33  #include <sys/user.h>
  34   34  #include <sys/systm.h>

  35   35  #include <sys/sysinfo.h>
  36   36  #include <sys/errno.h>
  37   37  #include <sys/cmn_err.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/resource.h>
  40   40  #include <sys/task.h>
  41   41  #include <sys/project.h>
  42   42  #include <sys/proc.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/disp.h>
  45   45  #include <sys/class.h>
  46   46  #include <vm/seg_kmem.h>
  47   47  #include <vm/seg_kp.h>
  48   48  #include <sys/machlock.h>
  49   49  #include <sys/kmem.h>
  50   50  #include <sys/varargs.h>
  51   51  #include <sys/turnstile.h>
  52   52  #include <sys/poll.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/callb.h>
  55   55  #include <c2/audit.h>
  56   56  #include <sys/tnf.h>
  57   57  #include <sys/sobject.h>
  58   58  #include <sys/cpupart.h>
  59   59  #include <sys/pset.h>
  60   60  #include <sys/door.h>
  61   61  #include <sys/spl.h>
  62   62  #include <sys/copyops.h>
  63   63  #include <sys/rctl.h>
  64   64  #include <sys/brand.h>
  65   65  #include <sys/pool.h>
  66   66  #include <sys/zone.h>

↓ open down ↓

32 lines elided

↑ open up ↑

  67   67  #include <sys/tsol/label.h>
  68   68  #include <sys/tsol/tndb.h>
  69   69  #include <sys/cpc_impl.h>
  70   70  #include <sys/sdt.h>
  71   71  #include <sys/reboot.h>
  72   72  #include <sys/kdi.h>
  73   73  #include <sys/schedctl.h>
  74   74  #include <sys/waitq.h>
  75   75  #include <sys/cpucaps.h>
  76   76  #include <sys/kiconv.h>
       77 +#include <sys/ctype.h>
  77   78  
  78   79  struct kmem_cache *thread_cache;        /* cache of free threads */
  79   80  struct kmem_cache *lwp_cache;           /* cache of free lwps */
  80   81  struct kmem_cache *turnstile_cache;     /* cache of free turnstiles */
  81   82  
  82   83  /*
  83   84   * allthreads is only for use by kmem_readers.  All kernel loops can use
  84   85   * the current thread as a start/end point.
  85   86   */
  86   87  kthread_t *allthreads = &t0;    /* circular list of all threads */

  87   88  
  88   89  static kcondvar_t reaper_cv;            /* synchronization var */
  89   90  kthread_t       *thread_deathrow;       /* circular list of reapable threads */
  90   91  kthread_t       *lwp_deathrow;          /* circular list of reapable threads */
  91   92  kmutex_t        reaplock;               /* protects lwp and thread deathrows */
  92   93  int     thread_reapcnt = 0;             /* number of threads on deathrow */
  93   94  int     lwp_reapcnt = 0;                /* number of lwps on deathrow */
  94   95  int     reaplimit = 16;                 /* delay reaping until reaplimit */
  95   96  
  96   97  thread_free_lock_t      *thread_free_lock;
  97   98                                          /* protects tick thread from reaper */
  98   99  
  99  100  extern int nthread;
 100  101  
 101  102  /* System Scheduling classes. */
 102  103  id_t    syscid;                         /* system scheduling class ID */
 103  104  id_t    sysdccid = CLASS_UNUSED;        /* reset when SDC loads */
 104  105  
 105  106  void    *segkp_thread;                  /* cookie for segkp pool */
 106  107  
 107  108  int lwp_cache_sz = 32;
 108  109  int t_cache_sz = 8;
 109  110  static kt_did_t next_t_id = 1;
 110  111  
 111  112  /* Default mode for thread binding to CPUs and processor sets */
 112  113  int default_binding_mode = TB_ALLHARD;
 113  114  
 114  115  /*
 115  116   * Min/Max stack sizes for stack size parameters
 116  117   */
 117  118  #define MAX_STKSIZE     (32 * DEFAULTSTKSZ)
 118  119  #define MIN_STKSIZE     DEFAULTSTKSZ
 119  120  
 120  121  /*
 121  122   * default_stksize overrides lwp_default_stksize if it is set.
 122  123   */
 123  124  int     default_stksize;
 124  125  int     lwp_default_stksize;
 125  126  
 126  127  static zone_key_t zone_thread_key;
 127  128  
 128  129  unsigned int kmem_stackinfo;            /* stackinfo feature on-off */
 129  130  kmem_stkinfo_t *kmem_stkinfo_log;       /* stackinfo circular log */
 130  131  static kmutex_t kmem_stkinfo_lock;      /* protects kmem_stkinfo_log */
 131  132  
 132  133  /*
 133  134   * forward declarations for internal thread specific data (tsd)
 134  135   */
 135  136  static void *tsd_realloc(void *, size_t, size_t);
 136  137  
 137  138  void thread_reaper(void);
 138  139  
 139  140  /* forward declarations for stackinfo feature */
 140  141  static void stkinfo_begin(kthread_t *);
 141  142  static void stkinfo_end(kthread_t *);
 142  143  static size_t stkinfo_percent(caddr_t, caddr_t, caddr_t);
 143  144  
 144  145  /*ARGSUSED*/
 145  146  static int
 146  147  turnstile_constructor(void *buf, void *cdrarg, int kmflags)
 147  148  {
 148  149          bzero(buf, sizeof (turnstile_t));
 149  150          return (0);
 150  151  }
 151  152  
 152  153  /*ARGSUSED*/
 153  154  static void
 154  155  turnstile_destructor(void *buf, void *cdrarg)
 155  156  {
 156  157          turnstile_t *ts = buf;
 157  158  
 158  159          ASSERT(ts->ts_free == NULL);
 159  160          ASSERT(ts->ts_waiters == 0);
 160  161          ASSERT(ts->ts_inheritor == NULL);
 161  162          ASSERT(ts->ts_sleepq[0].sq_first == NULL);
 162  163          ASSERT(ts->ts_sleepq[1].sq_first == NULL);
 163  164  }
 164  165  
 165  166  void
 166  167  thread_init(void)
 167  168  {
 168  169          kthread_t *tp;
 169  170          extern char sys_name[];
 170  171          extern void idle();
 171  172          struct cpu *cpu = CPU;
 172  173          int i;
 173  174          kmutex_t *lp;
 174  175  
 175  176          mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
 176  177          thread_free_lock =
 177  178              kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
 178  179          for (i = 0; i < THREAD_FREE_NUM; i++) {
 179  180                  lp = &thread_free_lock[i].tf_lock;
 180  181                  mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
 181  182          }
 182  183  
 183  184  #if defined(__i386) || defined(__amd64)
 184  185          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 185  186              PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
 186  187  
 187  188          /*
 188  189           * "struct _klwp" includes a "struct pcb", which includes a
 189  190           * "struct fpu", which needs to be 64-byte aligned on amd64
 190  191           * (and even on i386) for xsave/xrstor.
 191  192           */
 192  193          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 193  194              64, NULL, NULL, NULL, NULL, NULL, 0);
 194  195  #else
 195  196          /*
 196  197           * Allocate thread structures from static_arena.  This prevents
 197  198           * issues where a thread tries to relocate its own thread
 198  199           * structure and touches it after the mapping has been suspended.
 199  200           */
 200  201          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 201  202              PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
 202  203  
 203  204          lwp_stk_cache_init();
 204  205  
 205  206          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 206  207              0, NULL, NULL, NULL, NULL, NULL, 0);
 207  208  #endif
 208  209  
 209  210          turnstile_cache = kmem_cache_create("turnstile_cache",
 210  211              sizeof (turnstile_t), 0,
 211  212              turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
 212  213  
 213  214          label_init();
 214  215          cred_init();
 215  216  
 216  217          /*
 217  218           * Initialize various resource management facilities.
 218  219           */
 219  220          rctl_init();
 220  221          cpucaps_init();
 221  222          /*
 222  223           * Zone_init() should be called before project_init() so that project ID
 223  224           * for the first project is initialized correctly.
 224  225           */
 225  226          zone_init();
 226  227          project_init();
 227  228          brand_init();
 228  229          kiconv_init();
 229  230          task_init();
 230  231          tcache_init();
 231  232          pool_init();
 232  233  
 233  234          curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 234  235  
 235  236          /*
 236  237           * Originally, we had two parameters to set default stack
 237  238           * size: one for lwp's (lwp_default_stksize), and one for
 238  239           * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
 239  240           * Now we have a third parameter that overrides both if it is
 240  241           * set to a legal stack size, called default_stksize.
 241  242           */
 242  243  
 243  244          if (default_stksize == 0) {
 244  245                  default_stksize = DEFAULTSTKSZ;
 245  246          } else if (default_stksize % PAGESIZE != 0 ||
 246  247              default_stksize > MAX_STKSIZE ||
 247  248              default_stksize < MIN_STKSIZE) {
 248  249                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 249  250                      (int)DEFAULTSTKSZ);
 250  251                  default_stksize = DEFAULTSTKSZ;
 251  252          } else {
 252  253                  lwp_default_stksize = default_stksize;
 253  254          }
 254  255  
 255  256          if (lwp_default_stksize == 0) {
 256  257                  lwp_default_stksize = default_stksize;
 257  258          } else if (lwp_default_stksize % PAGESIZE != 0 ||
 258  259              lwp_default_stksize > MAX_STKSIZE ||
 259  260              lwp_default_stksize < MIN_STKSIZE) {
 260  261                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 261  262                      default_stksize);
 262  263                  lwp_default_stksize = default_stksize;
 263  264          }
 264  265  
 265  266          segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
 266  267              lwp_default_stksize,
 267  268              (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
 268  269  
 269  270          segkp_thread = segkp_cache_init(segkp, t_cache_sz,
 270  271              default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
 271  272  
 272  273          (void) getcid(sys_name, &syscid);
 273  274          curthread->t_cid = syscid;      /* current thread is t0 */
 274  275  
 275  276          /*
 276  277           * Set up the first CPU's idle thread.
 277  278           * It runs whenever the CPU has nothing worthwhile to do.
 278  279           */
 279  280          tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
 280  281          cpu->cpu_idle_thread = tp;
 281  282          tp->t_preempt = 1;
 282  283          tp->t_disp_queue = cpu->cpu_disp;
 283  284          ASSERT(tp->t_disp_queue != NULL);
 284  285          tp->t_bound_cpu = cpu;
 285  286          tp->t_affinitycnt = 1;
 286  287  
 287  288          /*
 288  289           * Registering a thread in the callback table is usually
 289  290           * done in the initialization code of the thread. In this
 290  291           * case, we do it right after thread creation to avoid
 291  292           * blocking idle thread while registering itself. It also
 292  293           * avoids the possibility of reregistration in case a CPU
 293  294           * restarts its idle thread.
 294  295           */
 295  296          CALLB_CPR_INIT_SAFE(tp, "idle");
 296  297  
 297  298          /*
 298  299           * Create the thread_reaper daemon. From this point on, exited
 299  300           * threads will get reaped.
 300  301           */
 301  302          (void) thread_create(NULL, 0, (void (*)())thread_reaper,
 302  303              NULL, 0, &p0, TS_RUN, minclsyspri);
 303  304  
 304  305          /*
 305  306           * Finish initializing the kernel memory allocator now that
 306  307           * thread_create() is available.
 307  308           */
 308  309          kmem_thread_init();
 309  310  
 310  311          if (boothowto & RB_DEBUG)
 311  312                  kdi_dvec_thravail();
 312  313  }
 313  314  
 314  315  /*
 315  316   * Create a thread.
 316  317   *
 317  318   * thread_create() blocks for memory if necessary.  It never fails.
 318  319   *
 319  320   * If stk is NULL, the thread is created at the base of the stack
 320  321   * and cannot be swapped.
 321  322   */
 322  323  kthread_t *
 323  324  thread_create(
 324  325          caddr_t stk,
 325  326          size_t  stksize,
 326  327          void    (*proc)(),
 327  328          void    *arg,
 328  329          size_t  len,
 329  330          proc_t   *pp,
 330  331          int     state,
 331  332          pri_t   pri)
 332  333  {
 333  334          kthread_t *t;
 334  335          extern struct classfuncs sys_classfuncs;
 335  336          turnstile_t *ts;
 336  337  
 337  338          /*
 338  339           * Every thread keeps a turnstile around in case it needs to block.
 339  340           * The only reason the turnstile is not simply part of the thread
 340  341           * structure is that we may have to break the association whenever
 341  342           * more than one thread blocks on a given synchronization object.
 342  343           * From a memory-management standpoint, turnstiles are like the
 343  344           * "attached mblks" that hang off dblks in the streams allocator.
 344  345           */
 345  346          ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 346  347  
 347  348          if (stk == NULL) {
 348  349                  /*
 349  350                   * alloc both thread and stack in segkp chunk
 350  351                   */
 351  352  
 352  353                  if (stksize < default_stksize)
 353  354                          stksize = default_stksize;
 354  355  
 355  356                  if (stksize == default_stksize) {
 356  357                          stk = (caddr_t)segkp_cache_get(segkp_thread);
 357  358                  } else {
 358  359                          stksize = roundup(stksize, PAGESIZE);
 359  360                          stk = (caddr_t)segkp_get(segkp, stksize,
 360  361                              (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
 361  362                  }
 362  363  
 363  364                  ASSERT(stk != NULL);
 364  365  
 365  366                  /*
 366  367                   * The machine-dependent mutex code may require that
 367  368                   * thread pointers (since they may be used for mutex owner
 368  369                   * fields) have certain alignment requirements.
 369  370                   * PTR24_ALIGN is the size of the alignment quanta.
 370  371                   * XXX - assumes stack grows toward low addresses.
 371  372                   */
 372  373                  if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
 373  374                          cmn_err(CE_PANIC, "thread_create: proposed stack size"
 374  375                              " too small to hold thread.");
 375  376  #ifdef STACK_GROWTH_DOWN
 376  377                  stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
 377  378                  stksize &= -PTR24_ALIGN;        /* make thread aligned */
 378  379                  t = (kthread_t *)(stk + stksize);
 379  380                  bzero(t, sizeof (kthread_t));
 380  381                  if (audit_active)
 381  382                          audit_thread_create(t);
 382  383                  t->t_stk = stk + stksize;
 383  384                  t->t_stkbase = stk;
 384  385  #else   /* stack grows to larger addresses */
 385  386                  stksize -= SA(sizeof (kthread_t));
 386  387                  t = (kthread_t *)(stk);
 387  388                  bzero(t, sizeof (kthread_t));
 388  389                  t->t_stk = stk + sizeof (kthread_t);
 389  390                  t->t_stkbase = stk + stksize + sizeof (kthread_t);
 390  391  #endif  /* STACK_GROWTH_DOWN */
 391  392                  t->t_flag |= T_TALLOCSTK;
 392  393                  t->t_swap = stk;
 393  394          } else {
 394  395                  t = kmem_cache_alloc(thread_cache, KM_SLEEP);
 395  396                  bzero(t, sizeof (kthread_t));
 396  397                  ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
 397  398                  if (audit_active)
 398  399                          audit_thread_create(t);
 399  400                  /*
 400  401                   * Initialize t_stk to the kernel stack pointer to use
 401  402                   * upon entry to the kernel
 402  403                   */
 403  404  #ifdef STACK_GROWTH_DOWN
 404  405                  t->t_stk = stk + stksize;
 405  406                  t->t_stkbase = stk;
 406  407  #else
 407  408                  t->t_stk = stk;                 /* 3b2-like */
 408  409                  t->t_stkbase = stk + stksize;
 409  410  #endif /* STACK_GROWTH_DOWN */
 410  411          }
 411  412  
 412  413          if (kmem_stackinfo != 0) {
 413  414                  stkinfo_begin(t);
 414  415          }
 415  416  
 416  417          t->t_ts = ts;
 417  418  
 418  419          /*
 419  420           * p_cred could be NULL if it thread_create is called before cred_init
 420  421           * is called in main.
 421  422           */
 422  423          mutex_enter(&pp->p_crlock);
 423  424          if (pp->p_cred)
 424  425                  crhold(t->t_cred = pp->p_cred);
 425  426          mutex_exit(&pp->p_crlock);
 426  427          t->t_start = gethrestime_sec();
 427  428          t->t_startpc = proc;
 428  429          t->t_procp = pp;
 429  430          t->t_clfuncs = &sys_classfuncs.thread;
 430  431          t->t_cid = syscid;
 431  432          t->t_pri = pri;
 432  433          t->t_stime = ddi_get_lbolt();
 433  434          t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
 434  435          t->t_bind_cpu = PBIND_NONE;
 435  436          t->t_bindflag = (uchar_t)default_binding_mode;
 436  437          t->t_bind_pset = PS_NONE;
 437  438          t->t_plockp = &pp->p_lock;
 438  439          t->t_copyops = NULL;
 439  440          t->t_taskq = NULL;
 440  441          t->t_anttime = 0;
 441  442          t->t_hatdepth = 0;
 442  443  
 443  444          t->t_dtrace_vtime = 1;  /* assure vtimestamp is always non-zero */
 444  445  
 445  446          CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
 446  447  #ifndef NPROBE
 447  448          /* Kernel probe */
 448  449          tnf_thread_create(t);
 449  450  #endif /* NPROBE */
 450  451          LOCK_INIT_CLEAR(&t->t_lock);
 451  452  
 452  453          /*
 453  454           * Callers who give us a NULL proc must do their own
 454  455           * stack initialization.  e.g. lwp_create()
 455  456           */
 456  457          if (proc != NULL) {
 457  458                  t->t_stk = thread_stk_init(t->t_stk);
 458  459                  thread_load(t, proc, arg, len);
 459  460          }
 460  461  
 461  462          /*
 462  463           * Put a hold on project0. If this thread is actually in a
 463  464           * different project, then t_proj will be changed later in
 464  465           * lwp_create().  All kernel-only threads must be in project 0.
 465  466           */
 466  467          t->t_proj = project_hold(proj0p);
 467  468  
 468  469          lgrp_affinity_init(&t->t_lgrp_affinity);
 469  470  
 470  471          mutex_enter(&pidlock);
 471  472          nthread++;
 472  473          t->t_did = next_t_id++;
 473  474          t->t_prev = curthread->t_prev;
 474  475          t->t_next = curthread;
 475  476  
 476  477          /*
 477  478           * Add the thread to the list of all threads, and initialize
 478  479           * its t_cpu pointer.  We need to block preemption since
 479  480           * cpu_offline walks the thread list looking for threads
 480  481           * with t_cpu pointing to the CPU being offlined.  We want
 481  482           * to make sure that the list is consistent and that if t_cpu
 482  483           * is set, the thread is on the list.
 483  484           */
 484  485          kpreempt_disable();
 485  486          curthread->t_prev->t_next = t;
 486  487          curthread->t_prev = t;
 487  488  
 488  489          /*
 489  490           * Threads should never have a NULL t_cpu pointer so assign it
 490  491           * here.  If the thread is being created with state TS_RUN a
 491  492           * better CPU may be chosen when it is placed on the run queue.
 492  493           *
 493  494           * We need to keep kernel preemption disabled when setting all
 494  495           * three fields to keep them in sync.  Also, always create in
 495  496           * the default partition since that's where kernel threads go
 496  497           * (if this isn't a kernel thread, t_cpupart will be changed
 497  498           * in lwp_create before setting the thread runnable).
 498  499           */
 499  500          t->t_cpupart = &cp_default;
 500  501  
 501  502          /*
 502  503           * For now, affiliate this thread with the root lgroup.
 503  504           * Since the kernel does not (presently) allocate its memory
 504  505           * in a locality aware fashion, the root is an appropriate home.
 505  506           * If this thread is later associated with an lwp, it will have
 506  507           * it's lgroup re-assigned at that time.
 507  508           */
 508  509          lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
 509  510  
 510  511          /*
 511  512           * Inherit the current cpu.  If this cpu isn't part of the chosen
 512  513           * lgroup, a new cpu will be chosen by cpu_choose when the thread
 513  514           * is ready to run.
 514  515           */
 515  516          if (CPU->cpu_part == &cp_default)
 516  517                  t->t_cpu = CPU;
 517  518          else
 518  519                  t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
 519  520                      t->t_pri, NULL);
 520  521  
 521  522          t->t_disp_queue = t->t_cpu->cpu_disp;
 522  523          kpreempt_enable();
 523  524  
 524  525          /*
 525  526           * Initialize thread state and the dispatcher lock pointer.
 526  527           * Need to hold onto pidlock to block allthreads walkers until
 527  528           * the state is set.
 528  529           */
 529  530          switch (state) {
 530  531          case TS_RUN:
 531  532                  curthread->t_oldspl = splhigh();        /* get dispatcher spl */
 532  533                  THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
 533  534                  CL_SETRUN(t);
 534  535                  thread_unlock(t);
 535  536                  break;
 536  537  
 537  538          case TS_ONPROC:
 538  539                  THREAD_ONPROC(t, t->t_cpu);
 539  540                  break;
 540  541  
 541  542          case TS_FREE:
 542  543                  /*
 543  544                   * Free state will be used for intr threads.
 544  545                   * The interrupt routine must set the thread dispatcher
 545  546                   * lock pointer (t_lockp) if starting on a CPU
 546  547                   * other than the current one.
 547  548                   */
 548  549                  THREAD_FREEINTR(t, CPU);
 549  550                  break;
 550  551  
 551  552          case TS_STOPPED:
 552  553                  THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
 553  554                  break;
 554  555  
 555  556          default:                        /* TS_SLEEP, TS_ZOMB or TS_TRANS */
 556  557                  cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
 557  558          }
 558  559          mutex_exit(&pidlock);
 559  560          return (t);
 560  561  }
 561  562  
 562  563  /*
 563  564   * Move thread to project0 and take care of project reference counters.
 564  565   */
 565  566  void
 566  567  thread_rele(kthread_t *t)
 567  568  {
 568  569          kproject_t *kpj;
 569  570  
 570  571          thread_lock(t);
 571  572  
 572  573          ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
 573  574          kpj = ttoproj(t);
 574  575          t->t_proj = proj0p;
 575  576  
 576  577          thread_unlock(t);
 577  578  
 578  579          if (kpj != proj0p) {
 579  580                  project_rele(kpj);
 580  581                  (void) project_hold(proj0p);
 581  582          }
 582  583  }
 583  584  
 584  585  void
 585  586  thread_exit(void)
 586  587  {
 587  588          kthread_t *t = curthread;
 588  589  
 589  590          if ((t->t_proc_flag & TP_ZTHREAD) != 0)
 590  591                  cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
 591  592  
 592  593          tsd_exit();             /* Clean up this thread's TSD */
 593  594  
 594  595          kcpc_passivate();       /* clean up performance counter state */
 595  596  
 596  597          /*
 597  598           * No kernel thread should have called poll() without arranging
 598  599           * calling pollcleanup() here.
 599  600           */
 600  601          ASSERT(t->t_pollstate == NULL);
 601  602          ASSERT(t->t_schedctl == NULL);
 602  603          if (t->t_door)
 603  604                  door_slam();    /* in case thread did an upcall */
 604  605  
 605  606  #ifndef NPROBE
 606  607          /* Kernel probe */
 607  608          if (t->t_tnf_tpdp)
 608  609                  tnf_thread_exit();
 609  610  #endif /* NPROBE */
 610  611  
 611  612          thread_rele(t);
 612  613          t->t_preempt++;
 613  614  
 614  615          /*
 615  616           * remove thread from the all threads list so that
 616  617           * death-row can use the same pointers.
 617  618           */
 618  619          mutex_enter(&pidlock);
 619  620          t->t_next->t_prev = t->t_prev;
 620  621          t->t_prev->t_next = t->t_next;
 621  622          ASSERT(allthreads != t);        /* t0 never exits */
 622  623          cv_broadcast(&t->t_joincv);     /* wake up anyone in thread_join */
 623  624          mutex_exit(&pidlock);
 624  625  
 625  626          if (t->t_ctx != NULL)
 626  627                  exitctx(t);
 627  628          if (t->t_procp->p_pctx != NULL)
 628  629                  exitpctx(t->t_procp);
 629  630  
 630  631          if (kmem_stackinfo != 0) {
 631  632                  stkinfo_end(t);
 632  633          }
 633  634  
 634  635          t->t_state = TS_ZOMB;   /* set zombie thread */
 635  636  
 636  637          swtch_from_zombie();    /* give up the CPU */
 637  638          /* NOTREACHED */
 638  639  }
 639  640  
 640  641  /*
 641  642   * Check to see if the specified thread is active (defined as being on
 642  643   * the thread list).  This is certainly a slow way to do this; if there's
 643  644   * ever a reason to speed it up, we could maintain a hash table of active
 644  645   * threads indexed by their t_did.
 645  646   */
 646  647  static kthread_t *
 647  648  did_to_thread(kt_did_t tid)
 648  649  {
 649  650          kthread_t *t;
 650  651  
 651  652          ASSERT(MUTEX_HELD(&pidlock));
 652  653          for (t = curthread->t_next; t != curthread; t = t->t_next) {
 653  654                  if (t->t_did == tid)
 654  655                          break;
 655  656          }
 656  657          if (t->t_did == tid)
 657  658                  return (t);
 658  659          else
 659  660                  return (NULL);
 660  661  }
 661  662  
 662  663  /*
 663  664   * Wait for specified thread to exit.  Returns immediately if the thread
 664  665   * could not be found, meaning that it has either already exited or never
 665  666   * existed.
 666  667   */
 667  668  void
 668  669  thread_join(kt_did_t tid)
 669  670  {
 670  671          kthread_t *t;
 671  672  
 672  673          ASSERT(tid != curthread->t_did);
 673  674          ASSERT(tid != t0.t_did);
 674  675  
 675  676          mutex_enter(&pidlock);
 676  677          /*
 677  678           * Make sure we check that the thread is on the thread list
 678  679           * before blocking on it; otherwise we could end up blocking on
 679  680           * a cv that's already been freed.  In other words, don't cache
 680  681           * the thread pointer across calls to cv_wait.
 681  682           *
 682  683           * The choice of loop invariant means that whenever a thread
 683  684           * is taken off the allthreads list, a cv_broadcast must be
 684  685           * performed on that thread's t_joincv to wake up any waiters.
 685  686           * The broadcast doesn't have to happen right away, but it
 686  687           * shouldn't be postponed indefinitely (e.g., by doing it in
 687  688           * thread_free which may only be executed when the deathrow
 688  689           * queue is processed.
 689  690           */
 690  691          while (t = did_to_thread(tid))
 691  692                  cv_wait(&t->t_joincv, &pidlock);
 692  693          mutex_exit(&pidlock);
 693  694  }
 694  695  
 695  696  void
 696  697  thread_free_prevent(kthread_t *t)
 697  698  {
 698  699          kmutex_t *lp;
 699  700  
 700  701          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 701  702          mutex_enter(lp);
 702  703  }
 703  704  
 704  705  void
 705  706  thread_free_allow(kthread_t *t)
 706  707  {
 707  708          kmutex_t *lp;
 708  709  
 709  710          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 710  711          mutex_exit(lp);
 711  712  }
 712  713  
 713  714  static void
 714  715  thread_free_barrier(kthread_t *t)
 715  716  {
 716  717          kmutex_t *lp;
 717  718  
 718  719          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 719  720          mutex_enter(lp);
 720  721          mutex_exit(lp);
 721  722  }
 722  723  
 723  724  void
 724  725  thread_free(kthread_t *t)
 725  726  {
 726  727          boolean_t allocstk = (t->t_flag & T_TALLOCSTK);
 727  728          klwp_t *lwp = t->t_lwp;
 728  729          caddr_t swap = t->t_swap;
 729  730  
 730  731          ASSERT(t != &t0 && t->t_state == TS_FREE);
 731  732          ASSERT(t->t_door == NULL);
 732  733          ASSERT(t->t_schedctl == NULL);
 733  734          ASSERT(t->t_pollstate == NULL);
 734  735  
 735  736          t->t_pri = 0;
 736  737          t->t_pc = 0;
 737  738          t->t_sp = 0;
 738  739          t->t_wchan0 = NULL;
 739  740          t->t_wchan = NULL;
 740  741          if (t->t_cred != NULL) {
 741  742                  crfree(t->t_cred);
 742  743                  t->t_cred = 0;
 743  744          }
 744  745          if (t->t_pdmsg) {
 745  746                  kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
 746  747                  t->t_pdmsg = NULL;
 747  748          }
 748  749          if (audit_active)
 749  750                  audit_thread_free(t);
 750  751  #ifndef NPROBE
 751  752          if (t->t_tnf_tpdp)
 752  753                  tnf_thread_free(t);
 753  754  #endif /* NPROBE */
 754  755          if (t->t_cldata) {
 755  756                  CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
 756  757          }
 757  758          if (t->t_rprof != NULL) {
 758  759                  kmem_free(t->t_rprof, sizeof (*t->t_rprof));
 759  760                  t->t_rprof = NULL;
 760  761          }
 761  762          t->t_lockp = NULL;      /* nothing should try to lock this thread now */
 762  763          if (lwp)
 763  764                  lwp_freeregs(lwp, 0);
 764  765          if (t->t_ctx)
 765  766                  freectx(t, 0);
 766  767          t->t_stk = NULL;
 767  768          if (lwp)
 768  769                  lwp_stk_fini(lwp);
 769  770          lock_clear(&t->t_lock);
 770  771  
 771  772          if (t->t_ts->ts_waiters > 0)
 772  773                  panic("thread_free: turnstile still active");
 773  774  
 774  775          kmem_cache_free(turnstile_cache, t->t_ts);
 775  776  
 776  777          free_afd(&t->t_activefd);
 777  778  
 778  779          /*
 779  780           * Barrier for the tick accounting code.  The tick accounting code
 780  781           * holds this lock to keep the thread from going away while it's
 781  782           * looking at it.
 782  783           */
 783  784          thread_free_barrier(t);

↓ open down ↓

697 lines elided

↑ open up ↑

 784  785  
 785  786          ASSERT(ttoproj(t) == proj0p);
 786  787          project_rele(ttoproj(t));
 787  788  
 788  789          lgrp_affinity_free(&t->t_lgrp_affinity);
 789  790  
 790  791          mutex_enter(&pidlock);
 791  792          nthread--;
 792  793          mutex_exit(&pidlock);
 793  794  
      795 +        if (t->t_name != NULL) {
      796 +                kmem_free(t->t_name, THREAD_NAME_MAX);
      797 +                t->t_name = NULL;
      798 +        }
      799 +
 794  800          /*
 795  801           * Free thread, lwp and stack.  This needs to be done carefully, since
 796  802           * if T_TALLOCSTK is set, the thread is part of the stack.
 797  803           */
 798  804          t->t_lwp = NULL;
 799  805          t->t_swap = NULL;
 800  806  
 801  807          if (swap) {
 802  808                  segkp_release(segkp, swap);
 803  809          }

 804  810          if (lwp) {
 805  811                  kmem_cache_free(lwp_cache, lwp);
 806  812          }
 807  813          if (!allocstk) {
 808  814                  kmem_cache_free(thread_cache, t);
 809  815          }
 810  816  }
 811  817  
 812  818  /*
 813  819   * Removes threads associated with the given zone from a deathrow queue.
 814  820   * tp is a pointer to the head of the deathrow queue, and countp is a
 815  821   * pointer to the current deathrow count.  Returns a linked list of
 816  822   * threads removed from the list.
 817  823   */
 818  824  static kthread_t *
 819  825  thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
 820  826  {
 821  827          kthread_t *tmp, *list = NULL;
 822  828          cred_t *cr;
 823  829  
 824  830          ASSERT(MUTEX_HELD(&reaplock));
 825  831          while (*tp != NULL) {
 826  832                  if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
 827  833                          tmp = *tp;
 828  834                          *tp = tmp->t_forw;
 829  835                          tmp->t_forw = list;
 830  836                          list = tmp;
 831  837                          (*countp)--;
 832  838                  } else {
 833  839                          tp = &(*tp)->t_forw;
 834  840                  }
 835  841          }
 836  842          return (list);
 837  843  }
 838  844  
 839  845  static void
 840  846  thread_reap_list(kthread_t *t)
 841  847  {
 842  848          kthread_t *next;
 843  849  
 844  850          while (t != NULL) {
 845  851                  next = t->t_forw;
 846  852                  thread_free(t);
 847  853                  t = next;
 848  854          }
 849  855  }
 850  856  
 851  857  /* ARGSUSED */
 852  858  static void
 853  859  thread_zone_destroy(zoneid_t zoneid, void *unused)
 854  860  {
 855  861          kthread_t *t, *l;
 856  862  
 857  863          mutex_enter(&reaplock);
 858  864          /*
 859  865           * Pull threads and lwps associated with zone off deathrow lists.
 860  866           */
 861  867          t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
 862  868          l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
 863  869          mutex_exit(&reaplock);
 864  870  
 865  871          /*
 866  872           * Guard against race condition in mutex_owner_running:
 867  873           *      thread=owner(mutex)
 868  874           *      <interrupt>
 869  875           *                              thread exits mutex
 870  876           *                              thread exits
 871  877           *                              thread reaped
 872  878           *                              thread struct freed
 873  879           * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 874  880           * A cross call to all cpus will cause the interrupt handler
 875  881           * to reset the PC if it is in mutex_owner_running, refreshing
 876  882           * stale thread pointers.
 877  883           */
 878  884          mutex_sync();   /* sync with mutex code */
 879  885  
 880  886          /*
 881  887           * Reap threads
 882  888           */
 883  889          thread_reap_list(t);
 884  890  
 885  891          /*
 886  892           * Reap lwps
 887  893           */
 888  894          thread_reap_list(l);
 889  895  }
 890  896  
 891  897  /*
 892  898   * cleanup zombie threads that are on deathrow.
 893  899   */
 894  900  void
 895  901  thread_reaper()
 896  902  {
 897  903          kthread_t *t, *l;
 898  904          callb_cpr_t cprinfo;
 899  905  
 900  906          /*
 901  907           * Register callback to clean up threads when zone is destroyed.
 902  908           */
 903  909          zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
 904  910  
 905  911          CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
 906  912          for (;;) {
 907  913                  mutex_enter(&reaplock);
 908  914                  while (thread_deathrow == NULL && lwp_deathrow == NULL) {
 909  915                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 910  916                          cv_wait(&reaper_cv, &reaplock);
 911  917                          CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
 912  918                  }
 913  919                  /*
 914  920                   * mutex_sync() needs to be called when reaping, but
 915  921                   * not too often.  We limit reaping rate to once
 916  922                   * per second.  Reaplimit is max rate at which threads can
 917  923                   * be freed. Does not impact thread destruction/creation.
 918  924                   */
 919  925                  t = thread_deathrow;
 920  926                  l = lwp_deathrow;
 921  927                  thread_deathrow = NULL;
 922  928                  lwp_deathrow = NULL;
 923  929                  thread_reapcnt = 0;
 924  930                  lwp_reapcnt = 0;
 925  931                  mutex_exit(&reaplock);
 926  932  
 927  933                  /*
 928  934                   * Guard against race condition in mutex_owner_running:
 929  935                   *      thread=owner(mutex)
 930  936                   *      <interrupt>
 931  937                   *                              thread exits mutex
 932  938                   *                              thread exits
 933  939                   *                              thread reaped
 934  940                   *                              thread struct freed
 935  941                   * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 936  942                   * A cross call to all cpus will cause the interrupt handler
 937  943                   * to reset the PC if it is in mutex_owner_running, refreshing
 938  944                   * stale thread pointers.
 939  945                   */
 940  946                  mutex_sync();   /* sync with mutex code */
 941  947                  /*
 942  948                   * Reap threads
 943  949                   */
 944  950                  thread_reap_list(t);
 945  951  
 946  952                  /*
 947  953                   * Reap lwps
 948  954                   */
 949  955                  thread_reap_list(l);
 950  956                  delay(hz);
 951  957          }
 952  958  }
 953  959  
 954  960  /*
 955  961   * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
 956  962   * thread_deathrow. The thread's state is changed already TS_FREE to indicate
 957  963   * that is reapable. The thread already holds the reaplock, and was already
 958  964   * freed.
 959  965   */
 960  966  void
 961  967  reapq_move_lq_to_tq(kthread_t *t)
 962  968  {
 963  969          ASSERT(t->t_state == TS_FREE);
 964  970          ASSERT(MUTEX_HELD(&reaplock));
 965  971          t->t_forw = thread_deathrow;
 966  972          thread_deathrow = t;
 967  973          thread_reapcnt++;
 968  974          if (lwp_reapcnt + thread_reapcnt > reaplimit)
 969  975                  cv_signal(&reaper_cv);  /* wake the reaper */
 970  976  }
 971  977  
 972  978  /*
 973  979   * This is called by resume() to put a zombie thread onto deathrow.
 974  980   * The thread's state is changed to TS_FREE to indicate that is reapable.
 975  981   * This is called from the idle thread so it must not block - just spin.
 976  982   */
 977  983  void
 978  984  reapq_add(kthread_t *t)
 979  985  {
 980  986          mutex_enter(&reaplock);
 981  987  
 982  988          /*
 983  989           * lwp_deathrow contains threads with lwp linkage and
 984  990           * swappable thread stacks which have the default stacksize.
 985  991           * These threads' lwps and stacks may be reused by lwp_create().
 986  992           *
 987  993           * Anything else goes on thread_deathrow(), where it will eventually
 988  994           * be thread_free()d.
 989  995           */
 990  996          if (t->t_flag & T_LWPREUSE) {
 991  997                  ASSERT(ttolwp(t) != NULL);
 992  998                  t->t_forw = lwp_deathrow;
 993  999                  lwp_deathrow = t;
 994 1000                  lwp_reapcnt++;
 995 1001          } else {
 996 1002                  t->t_forw = thread_deathrow;
 997 1003                  thread_deathrow = t;
 998 1004                  thread_reapcnt++;
 999 1005          }
1000 1006          if (lwp_reapcnt + thread_reapcnt > reaplimit)
1001 1007                  cv_signal(&reaper_cv);  /* wake the reaper */
1002 1008          t->t_state = TS_FREE;
1003 1009          lock_clear(&t->t_lock);
1004 1010  
1005 1011          /*
1006 1012           * Before we return, we need to grab and drop the thread lock for
1007 1013           * the dead thread.  At this point, the current thread is the idle
1008 1014           * thread, and the dead thread's CPU lock points to the current
1009 1015           * CPU -- and we must grab and drop the lock to synchronize with
1010 1016           * a racing thread walking a blocking chain that the zombie thread
1011 1017           * was recently in.  By this point, that blocking chain is (by
1012 1018           * definition) stale:  the dead thread is not holding any locks, and
1013 1019           * is therefore not in any blocking chains -- but if we do not regrab
1014 1020           * our lock before freeing the dead thread's data structures, the
1015 1021           * thread walking the (stale) blocking chain will die on memory
1016 1022           * corruption when it attempts to drop the dead thread's lock.  We
1017 1023           * only need do this once because there is no way for the dead thread
1018 1024           * to ever again be on a blocking chain:  once we have grabbed and
1019 1025           * dropped the thread lock, we are guaranteed that anyone that could
1020 1026           * have seen this thread in a blocking chain can no longer see it.
1021 1027           */
1022 1028          thread_lock(t);
1023 1029          thread_unlock(t);
1024 1030  
1025 1031          mutex_exit(&reaplock);
1026 1032  }
1027 1033  
1028 1034  /*
1029 1035   * Install thread context ops for the current thread.
1030 1036   */
1031 1037  void
1032 1038  installctx(
1033 1039          kthread_t *t,
1034 1040          void    *arg,
1035 1041          void    (*save)(void *),
1036 1042          void    (*restore)(void *),
1037 1043          void    (*fork)(void *, void *),
1038 1044          void    (*lwp_create)(void *, void *),
1039 1045          void    (*exit)(void *),
1040 1046          void    (*free)(void *, int))
1041 1047  {
1042 1048          struct ctxop *ctx;
1043 1049  
1044 1050          ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
1045 1051          ctx->save_op = save;
1046 1052          ctx->restore_op = restore;
1047 1053          ctx->fork_op = fork;
1048 1054          ctx->lwp_create_op = lwp_create;
1049 1055          ctx->exit_op = exit;
1050 1056          ctx->free_op = free;
1051 1057          ctx->arg = arg;
1052 1058          ctx->next = t->t_ctx;
1053 1059          t->t_ctx = ctx;
1054 1060  }
1055 1061  
1056 1062  /*
1057 1063   * Remove the thread context ops from a thread.
1058 1064   */
1059 1065  int
1060 1066  removectx(
1061 1067          kthread_t *t,
1062 1068          void    *arg,
1063 1069          void    (*save)(void *),
1064 1070          void    (*restore)(void *),
1065 1071          void    (*fork)(void *, void *),
1066 1072          void    (*lwp_create)(void *, void *),
1067 1073          void    (*exit)(void *),
1068 1074          void    (*free)(void *, int))
1069 1075  {
1070 1076          struct ctxop *ctx, *prev_ctx;
1071 1077  
1072 1078          /*
1073 1079           * The incoming kthread_t (which is the thread for which the
1074 1080           * context ops will be removed) should be one of the following:
1075 1081           *
1076 1082           * a) the current thread,
1077 1083           *
1078 1084           * b) a thread of a process that's being forked (SIDL),
1079 1085           *
1080 1086           * c) a thread that belongs to the same process as the current
1081 1087           *    thread and for which the current thread is the agent thread,
1082 1088           *
1083 1089           * d) a thread that is TS_STOPPED which is indicative of it
1084 1090           *    being (if curthread is not an agent) a thread being created
1085 1091           *    as part of an lwp creation.
1086 1092           */
1087 1093          ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
1088 1094              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1089 1095  
1090 1096          /*
1091 1097           * Serialize modifications to t->t_ctx to prevent the agent thread
1092 1098           * and the target thread from racing with each other during lwp exit.
1093 1099           */
1094 1100          mutex_enter(&t->t_ctx_lock);
1095 1101          prev_ctx = NULL;
1096 1102          kpreempt_disable();
1097 1103          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
1098 1104                  if (ctx->save_op == save && ctx->restore_op == restore &&
1099 1105                      ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
1100 1106                      ctx->exit_op == exit && ctx->free_op == free &&
1101 1107                      ctx->arg == arg) {
1102 1108                          if (prev_ctx)
1103 1109                                  prev_ctx->next = ctx->next;
1104 1110                          else
1105 1111                                  t->t_ctx = ctx->next;
1106 1112                          mutex_exit(&t->t_ctx_lock);
1107 1113                          if (ctx->free_op != NULL)
1108 1114                                  (ctx->free_op)(ctx->arg, 0);
1109 1115                          kmem_free(ctx, sizeof (struct ctxop));
1110 1116                          kpreempt_enable();
1111 1117                          return (1);
1112 1118                  }
1113 1119                  prev_ctx = ctx;
1114 1120          }
1115 1121          mutex_exit(&t->t_ctx_lock);
1116 1122          kpreempt_enable();
1117 1123  
1118 1124          return (0);
1119 1125  }
1120 1126  
1121 1127  void
1122 1128  savectx(kthread_t *t)
1123 1129  {
1124 1130          struct ctxop *ctx;
1125 1131  
1126 1132          ASSERT(t == curthread);
1127 1133          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1128 1134                  if (ctx->save_op != NULL)
1129 1135                          (ctx->save_op)(ctx->arg);
1130 1136  }
1131 1137  
1132 1138  void
1133 1139  restorectx(kthread_t *t)
1134 1140  {
1135 1141          struct ctxop *ctx;
1136 1142  
1137 1143          ASSERT(t == curthread);
1138 1144          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1139 1145                  if (ctx->restore_op != NULL)
1140 1146                          (ctx->restore_op)(ctx->arg);
1141 1147  }
1142 1148  
1143 1149  void
1144 1150  forkctx(kthread_t *t, kthread_t *ct)
1145 1151  {
1146 1152          struct ctxop *ctx;
1147 1153  
1148 1154          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1149 1155                  if (ctx->fork_op != NULL)
1150 1156                          (ctx->fork_op)(t, ct);
1151 1157  }
1152 1158  
1153 1159  /*
1154 1160   * Note that this operator is only invoked via the _lwp_create
1155 1161   * system call.  The system may have other reasons to create lwps
1156 1162   * e.g. the agent lwp or the doors unreferenced lwp.
1157 1163   */
1158 1164  void
1159 1165  lwp_createctx(kthread_t *t, kthread_t *ct)
1160 1166  {
1161 1167          struct ctxop *ctx;
1162 1168  
1163 1169          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1164 1170                  if (ctx->lwp_create_op != NULL)
1165 1171                          (ctx->lwp_create_op)(t, ct);
1166 1172  }
1167 1173  
1168 1174  /*
1169 1175   * exitctx is called from thread_exit() and lwp_exit() to perform any actions
1170 1176   * needed when the thread/LWP leaves the processor for the last time. This
1171 1177   * routine is not intended to deal with freeing memory; freectx() is used for
1172 1178   * that purpose during thread_free(). This routine is provided to allow for
1173 1179   * clean-up that can't wait until thread_free().
1174 1180   */
1175 1181  void
1176 1182  exitctx(kthread_t *t)
1177 1183  {
1178 1184          struct ctxop *ctx;
1179 1185  
1180 1186          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1181 1187                  if (ctx->exit_op != NULL)
1182 1188                          (ctx->exit_op)(t);
1183 1189  }
1184 1190  
1185 1191  /*
1186 1192   * freectx is called from thread_free() and exec() to get
1187 1193   * rid of old thread context ops.
1188 1194   */
1189 1195  void
1190 1196  freectx(kthread_t *t, int isexec)
1191 1197  {
1192 1198          struct ctxop *ctx;
1193 1199  
1194 1200          kpreempt_disable();
1195 1201          while ((ctx = t->t_ctx) != NULL) {
1196 1202                  t->t_ctx = ctx->next;
1197 1203                  if (ctx->free_op != NULL)
1198 1204                          (ctx->free_op)(ctx->arg, isexec);
1199 1205                  kmem_free(ctx, sizeof (struct ctxop));
1200 1206          }
1201 1207          kpreempt_enable();
1202 1208  }
1203 1209  
1204 1210  /*
1205 1211   * freectx_ctx is called from lwp_create() when lwp is reused from
1206 1212   * lwp_deathrow and its thread structure is added to thread_deathrow.
1207 1213   * The thread structure to which this ctx was attached may be already
1208 1214   * freed by the thread reaper so free_op implementations shouldn't rely
1209 1215   * on thread structure to which this ctx was attached still being around.
1210 1216   */
1211 1217  void
1212 1218  freectx_ctx(struct ctxop *ctx)
1213 1219  {
1214 1220          struct ctxop *nctx;
1215 1221  
1216 1222          ASSERT(ctx != NULL);
1217 1223  
1218 1224          kpreempt_disable();
1219 1225          do {
1220 1226                  nctx = ctx->next;
1221 1227                  if (ctx->free_op != NULL)
1222 1228                          (ctx->free_op)(ctx->arg, 0);
1223 1229                  kmem_free(ctx, sizeof (struct ctxop));
1224 1230          } while ((ctx = nctx) != NULL);
1225 1231          kpreempt_enable();
1226 1232  }
1227 1233  
1228 1234  /*
1229 1235   * Set the thread running; arrange for it to be swapped in if necessary.
1230 1236   */
1231 1237  void
1232 1238  setrun_locked(kthread_t *t)
1233 1239  {
1234 1240          ASSERT(THREAD_LOCK_HELD(t));
1235 1241          if (t->t_state == TS_SLEEP) {
1236 1242                  /*
1237 1243                   * Take off sleep queue.
1238 1244                   */
1239 1245                  SOBJ_UNSLEEP(t->t_sobj_ops, t);
1240 1246          } else if (t->t_state & (TS_RUN | TS_ONPROC)) {
1241 1247                  /*
1242 1248                   * Already on dispatcher queue.
1243 1249                   */
1244 1250                  return;
1245 1251          } else if (t->t_state == TS_WAIT) {
1246 1252                  waitq_setrun(t);
1247 1253          } else if (t->t_state == TS_STOPPED) {
1248 1254                  /*
1249 1255                   * All of the sending of SIGCONT (TC_XSTART) and /proc
1250 1256                   * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
1251 1257                   * requested that the thread be run.
1252 1258                   * Just calling setrun() is not sufficient to set a stopped
1253 1259                   * thread running.  TP_TXSTART is always set if the thread
1254 1260                   * is not stopped by a jobcontrol stop signal.
1255 1261                   * TP_TPSTART is always set if /proc is not controlling it.
1256 1262                   * TP_TCSTART is always set if lwp_suspend() didn't stop it.
1257 1263                   * The thread won't be stopped unless one of these
1258 1264                   * three mechanisms did it.
1259 1265                   *
1260 1266                   * These flags must be set before calling setrun_locked(t).
1261 1267                   * They can't be passed as arguments because the streams
1262 1268                   * code calls setrun() indirectly and the mechanism for
1263 1269                   * doing so admits only one argument.  Note that the
1264 1270                   * thread must be locked in order to change t_schedflags.
1265 1271                   */
1266 1272                  if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
1267 1273                          return;
1268 1274                  /*
1269 1275                   * Process is no longer stopped (a thread is running).
1270 1276                   */
1271 1277                  t->t_whystop = 0;
1272 1278                  t->t_whatstop = 0;
1273 1279                  /*
1274 1280                   * Strictly speaking, we do not have to clear these
1275 1281                   * flags here; they are cleared on entry to stop().
1276 1282                   * However, they are confusing when doing kernel
1277 1283                   * debugging or when they are revealed by ps(1).
1278 1284                   */
1279 1285                  t->t_schedflag &= ~TS_ALLSTART;
1280 1286                  THREAD_TRANSITION(t);   /* drop stopped-thread lock */
1281 1287                  ASSERT(t->t_lockp == &transition_lock);
1282 1288                  ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
1283 1289                  /*
1284 1290                   * Let the class put the process on the dispatcher queue.
1285 1291                   */
1286 1292                  CL_SETRUN(t);
1287 1293          }
1288 1294  }
1289 1295  
1290 1296  void
1291 1297  setrun(kthread_t *t)
1292 1298  {
1293 1299          thread_lock(t);
1294 1300          setrun_locked(t);
1295 1301          thread_unlock(t);
1296 1302  }
1297 1303  
1298 1304  /*
1299 1305   * Unpin an interrupted thread.
1300 1306   *      When an interrupt occurs, the interrupt is handled on the stack
1301 1307   *      of an interrupt thread, taken from a pool linked to the CPU structure.
1302 1308   *
1303 1309   *      When swtch() is switching away from an interrupt thread because it
1304 1310   *      blocked or was preempted, this routine is called to complete the
1305 1311   *      saving of the interrupted thread state, and returns the interrupted
1306 1312   *      thread pointer so it may be resumed.
1307 1313   *
1308 1314   *      Called by swtch() only at high spl.
1309 1315   */
1310 1316  kthread_t *
1311 1317  thread_unpin()
1312 1318  {
1313 1319          kthread_t       *t = curthread; /* current thread */
1314 1320          kthread_t       *itp;           /* interrupted thread */
1315 1321          int             i;              /* interrupt level */
1316 1322          extern int      intr_passivate();
1317 1323  
1318 1324          ASSERT(t->t_intr != NULL);
1319 1325  
1320 1326          itp = t->t_intr;                /* interrupted thread */
1321 1327          t->t_intr = NULL;               /* clear interrupt ptr */
1322 1328  
1323 1329          /*
1324 1330           * Get state from interrupt thread for the one
1325 1331           * it interrupted.
1326 1332           */
1327 1333  
1328 1334          i = intr_passivate(t, itp);
1329 1335  
1330 1336          TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
1331 1337              "intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
1332 1338              i, t, t, itp, itp);
1333 1339  
1334 1340          /*
1335 1341           * Dissociate the current thread from the interrupted thread's LWP.
1336 1342           */
1337 1343          t->t_lwp = NULL;
1338 1344  
1339 1345          /*
1340 1346           * Interrupt handlers above the level that spinlocks block must
1341 1347           * not block.
1342 1348           */
1343 1349  #if DEBUG
1344 1350          if (i < 0 || i > LOCK_LEVEL)
1345 1351                  cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
1346 1352  #endif
1347 1353  
1348 1354          /*
1349 1355           * Compute the CPU's base interrupt level based on the active
1350 1356           * interrupts.
1351 1357           */
1352 1358          ASSERT(CPU->cpu_intr_actv & (1 << i));
1353 1359          set_base_spl();
1354 1360  
1355 1361          return (itp);
1356 1362  }
1357 1363  
1358 1364  /*
1359 1365   * Create and initialize an interrupt thread.
1360 1366   *      Returns non-zero on error.
1361 1367   *      Called at spl7() or better.
1362 1368   */
1363 1369  void
1364 1370  thread_create_intr(struct cpu *cp)
1365 1371  {
1366 1372          kthread_t *tp;
1367 1373  
1368 1374          tp = thread_create(NULL, 0,
1369 1375              (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
1370 1376  
1371 1377          /*
1372 1378           * Set the thread in the TS_FREE state.  The state will change
1373 1379           * to TS_ONPROC only while the interrupt is active.  Think of these
1374 1380           * as being on a private free list for the CPU.  Being TS_FREE keeps
1375 1381           * inactive interrupt threads out of debugger thread lists.
1376 1382           *
1377 1383           * We cannot call thread_create with TS_FREE because of the current
1378 1384           * checks there for ONPROC.  Fix this when thread_create takes flags.
1379 1385           */
1380 1386          THREAD_FREEINTR(tp, cp);
1381 1387  
1382 1388          /*
1383 1389           * Nobody should ever reference the credentials of an interrupt
1384 1390           * thread so make it NULL to catch any such references.
1385 1391           */
1386 1392          tp->t_cred = NULL;
1387 1393          tp->t_flag |= T_INTR_THREAD;
1388 1394          tp->t_cpu = cp;
1389 1395          tp->t_bound_cpu = cp;
1390 1396          tp->t_disp_queue = cp->cpu_disp;
1391 1397          tp->t_affinitycnt = 1;
1392 1398          tp->t_preempt = 1;
1393 1399  
1394 1400          /*
1395 1401           * Don't make a user-requested binding on this thread so that
1396 1402           * the processor can be offlined.
1397 1403           */
1398 1404          tp->t_bind_cpu = PBIND_NONE;    /* no USER-requested binding */
1399 1405          tp->t_bind_pset = PS_NONE;
1400 1406  
1401 1407  #if defined(__i386) || defined(__amd64)
1402 1408          tp->t_stk -= STACK_ALIGN;
1403 1409          *(tp->t_stk) = 0;               /* terminate intr thread stack */
1404 1410  #endif
1405 1411  
1406 1412          /*
1407 1413           * Link onto CPU's interrupt pool.
1408 1414           */
1409 1415          tp->t_link = cp->cpu_intr_thread;
1410 1416          cp->cpu_intr_thread = tp;
1411 1417  }
1412 1418  
1413 1419  /*
1414 1420   * TSD -- THREAD SPECIFIC DATA
1415 1421   */
1416 1422  static kmutex_t         tsd_mutex;       /* linked list spin lock */
1417 1423  static uint_t           tsd_nkeys;       /* size of destructor array */
1418 1424  /* per-key destructor funcs */
1419 1425  static void             (**tsd_destructor)(void *);
1420 1426  /* list of tsd_thread's */
1421 1427  static struct tsd_thread        *tsd_list;
1422 1428  
1423 1429  /*
1424 1430   * Default destructor
1425 1431   *      Needed because NULL destructor means that the key is unused
1426 1432   */
1427 1433  /* ARGSUSED */
1428 1434  void
1429 1435  tsd_defaultdestructor(void *value)
1430 1436  {}
1431 1437  
1432 1438  /*
1433 1439   * Create a key (index into per thread array)
1434 1440   *      Locks out tsd_create, tsd_destroy, and tsd_exit
1435 1441   *      May allocate memory with lock held
1436 1442   */
1437 1443  void
1438 1444  tsd_create(uint_t *keyp, void (*destructor)(void *))
1439 1445  {
1440 1446          int     i;
1441 1447          uint_t  nkeys;
1442 1448  
1443 1449          /*
1444 1450           * if key is allocated, do nothing
1445 1451           */
1446 1452          mutex_enter(&tsd_mutex);
1447 1453          if (*keyp) {
1448 1454                  mutex_exit(&tsd_mutex);
1449 1455                  return;
1450 1456          }
1451 1457          /*
1452 1458           * find an unused key
1453 1459           */
1454 1460          if (destructor == NULL)
1455 1461                  destructor = tsd_defaultdestructor;
1456 1462  
1457 1463          for (i = 0; i < tsd_nkeys; ++i)
1458 1464                  if (tsd_destructor[i] == NULL)
1459 1465                          break;
1460 1466  
1461 1467          /*
1462 1468           * if no unused keys, increase the size of the destructor array
1463 1469           */
1464 1470          if (i == tsd_nkeys) {
1465 1471                  if ((nkeys = (tsd_nkeys << 1)) == 0)
1466 1472                          nkeys = 1;
1467 1473                  tsd_destructor =
1468 1474                      (void (**)(void *))tsd_realloc((void *)tsd_destructor,
1469 1475                      (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
1470 1476                      (size_t)(nkeys * sizeof (void (*)(void *))));
1471 1477                  tsd_nkeys = nkeys;
1472 1478          }
1473 1479  
1474 1480          /*
1475 1481           * allocate the next available unused key
1476 1482           */
1477 1483          tsd_destructor[i] = destructor;
1478 1484          *keyp = i + 1;
1479 1485          mutex_exit(&tsd_mutex);
1480 1486  }
1481 1487  
1482 1488  /*
1483 1489   * Destroy a key -- this is for unloadable modules
1484 1490   *
1485 1491   * Assumes that the caller is preventing tsd_set and tsd_get
1486 1492   * Locks out tsd_create, tsd_destroy, and tsd_exit
1487 1493   * May free memory with lock held
1488 1494   */
1489 1495  void
1490 1496  tsd_destroy(uint_t *keyp)
1491 1497  {
1492 1498          uint_t key;
1493 1499          struct tsd_thread *tsd;
1494 1500  
1495 1501          /*
1496 1502           * protect the key namespace and our destructor lists
1497 1503           */
1498 1504          mutex_enter(&tsd_mutex);
1499 1505          key = *keyp;
1500 1506          *keyp = 0;
1501 1507  
1502 1508          ASSERT(key <= tsd_nkeys);
1503 1509  
1504 1510          /*
1505 1511           * if the key is valid
1506 1512           */
1507 1513          if (key != 0) {
1508 1514                  uint_t k = key - 1;
1509 1515                  /*
1510 1516                   * for every thread with TSD, call key's destructor
1511 1517                   */
1512 1518                  for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
1513 1519                          /*
1514 1520                           * no TSD for key in this thread
1515 1521                           */
1516 1522                          if (key > tsd->ts_nkeys)
1517 1523                                  continue;
1518 1524                          /*
1519 1525                           * call destructor for key
1520 1526                           */
1521 1527                          if (tsd->ts_value[k] && tsd_destructor[k])
1522 1528                                  (*tsd_destructor[k])(tsd->ts_value[k]);
1523 1529                          /*
1524 1530                           * reset value for key
1525 1531                           */
1526 1532                          tsd->ts_value[k] = NULL;
1527 1533                  }
1528 1534                  /*
1529 1535                   * actually free the key (NULL destructor == unused)
1530 1536                   */
1531 1537                  tsd_destructor[k] = NULL;
1532 1538          }
1533 1539  
1534 1540          mutex_exit(&tsd_mutex);
1535 1541  }
1536 1542  
1537 1543  /*
1538 1544   * Quickly return the per thread value that was stored with the specified key
1539 1545   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1540 1546   */
1541 1547  void *
1542 1548  tsd_get(uint_t key)
1543 1549  {
1544 1550          return (tsd_agent_get(curthread, key));
1545 1551  }
1546 1552  
1547 1553  /*
1548 1554   * Set a per thread value indexed with the specified key
1549 1555   */
1550 1556  int
1551 1557  tsd_set(uint_t key, void *value)
1552 1558  {
1553 1559          return (tsd_agent_set(curthread, key, value));
1554 1560  }
1555 1561  
1556 1562  /*
1557 1563   * Like tsd_get(), except that the agent lwp can get the tsd of
1558 1564   * another thread in the same process (the agent thread only runs when the
1559 1565   * process is completely stopped by /proc), or syslwp is creating a new lwp.
1560 1566   */
1561 1567  void *
1562 1568  tsd_agent_get(kthread_t *t, uint_t key)
1563 1569  {
1564 1570          struct tsd_thread *tsd = t->t_tsd;
1565 1571  
1566 1572          ASSERT(t == curthread ||
1567 1573              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1568 1574  
1569 1575          if (key && tsd != NULL && key <= tsd->ts_nkeys)
1570 1576                  return (tsd->ts_value[key - 1]);
1571 1577          return (NULL);
1572 1578  }
1573 1579  
1574 1580  /*
1575 1581   * Like tsd_set(), except that the agent lwp can set the tsd of
1576 1582   * another thread in the same process, or syslwp can set the tsd
1577 1583   * of a thread it's in the middle of creating.
1578 1584   *
1579 1585   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1580 1586   * May lock out tsd_destroy (and tsd_create), may allocate memory with
1581 1587   * lock held
1582 1588   */
1583 1589  int
1584 1590  tsd_agent_set(kthread_t *t, uint_t key, void *value)
1585 1591  {
1586 1592          struct tsd_thread *tsd = t->t_tsd;
1587 1593  
1588 1594          ASSERT(t == curthread ||
1589 1595              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1590 1596  
1591 1597          if (key == 0)
1592 1598                  return (EINVAL);
1593 1599          if (tsd == NULL)
1594 1600                  tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1595 1601          if (key <= tsd->ts_nkeys) {
1596 1602                  tsd->ts_value[key - 1] = value;
1597 1603                  return (0);
1598 1604          }
1599 1605  
1600 1606          ASSERT(key <= tsd_nkeys);
1601 1607  
1602 1608          /*
1603 1609           * lock out tsd_destroy()
1604 1610           */
1605 1611          mutex_enter(&tsd_mutex);
1606 1612          if (tsd->ts_nkeys == 0) {
1607 1613                  /*
1608 1614                   * Link onto list of threads with TSD
1609 1615                   */
1610 1616                  if ((tsd->ts_next = tsd_list) != NULL)
1611 1617                          tsd_list->ts_prev = tsd;
1612 1618                  tsd_list = tsd;
1613 1619          }
1614 1620  
1615 1621          /*
1616 1622           * Allocate thread local storage and set the value for key
1617 1623           */
1618 1624          tsd->ts_value = tsd_realloc(tsd->ts_value,
1619 1625              tsd->ts_nkeys * sizeof (void *),
1620 1626              key * sizeof (void *));
1621 1627          tsd->ts_nkeys = key;
1622 1628          tsd->ts_value[key - 1] = value;
1623 1629          mutex_exit(&tsd_mutex);
1624 1630  
1625 1631          return (0);
1626 1632  }
1627 1633  
1628 1634  
1629 1635  /*
1630 1636   * Return the per thread value that was stored with the specified key
1631 1637   *      If necessary, create the key and the value
1632 1638   *      Assumes the caller is protecting *keyp from tsd_destroy
1633 1639   */
1634 1640  void *
1635 1641  tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
1636 1642  {
1637 1643          void *value;
1638 1644          uint_t key = *keyp;
1639 1645          struct tsd_thread *tsd = curthread->t_tsd;
1640 1646  
1641 1647          if (tsd == NULL)
1642 1648                  tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1643 1649          if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
1644 1650                  return (value);
1645 1651          if (key == 0)
1646 1652                  tsd_create(keyp, destroy);
1647 1653          (void) tsd_set(*keyp, value = (*allocate)());
1648 1654  
1649 1655          return (value);
1650 1656  }
1651 1657  
1652 1658  /*
1653 1659   * Called from thread_exit() to run the destructor function for each tsd
1654 1660   *      Locks out tsd_create and tsd_destroy
1655 1661   *      Assumes that the destructor *DOES NOT* use tsd
1656 1662   */
1657 1663  void
1658 1664  tsd_exit(void)
1659 1665  {
1660 1666          int i;
1661 1667          struct tsd_thread *tsd = curthread->t_tsd;
1662 1668  
1663 1669          if (tsd == NULL)
1664 1670                  return;
1665 1671  
1666 1672          if (tsd->ts_nkeys == 0) {
1667 1673                  kmem_free(tsd, sizeof (*tsd));
1668 1674                  curthread->t_tsd = NULL;
1669 1675                  return;
1670 1676          }
1671 1677  
1672 1678          /*
1673 1679           * lock out tsd_create and tsd_destroy, call
1674 1680           * the destructor, and mark the value as destroyed.
1675 1681           */
1676 1682          mutex_enter(&tsd_mutex);
1677 1683  
1678 1684          for (i = 0; i < tsd->ts_nkeys; i++) {
1679 1685                  if (tsd->ts_value[i] && tsd_destructor[i])
1680 1686                          (*tsd_destructor[i])(tsd->ts_value[i]);
1681 1687                  tsd->ts_value[i] = NULL;
1682 1688          }
1683 1689  
1684 1690          /*
1685 1691           * remove from linked list of threads with TSD
1686 1692           */
1687 1693          if (tsd->ts_next)
1688 1694                  tsd->ts_next->ts_prev = tsd->ts_prev;
1689 1695          if (tsd->ts_prev)
1690 1696                  tsd->ts_prev->ts_next = tsd->ts_next;
1691 1697          if (tsd_list == tsd)
1692 1698                  tsd_list = tsd->ts_next;
1693 1699  
1694 1700          mutex_exit(&tsd_mutex);
1695 1701  
1696 1702          /*
1697 1703           * free up the TSD
1698 1704           */
1699 1705          kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
1700 1706          kmem_free(tsd, sizeof (struct tsd_thread));
1701 1707          curthread->t_tsd = NULL;
1702 1708  }
1703 1709  
1704 1710  /*
1705 1711   * realloc
1706 1712   */
1707 1713  static void *
1708 1714  tsd_realloc(void *old, size_t osize, size_t nsize)
1709 1715  {
1710 1716          void *new;
1711 1717  
1712 1718          new = kmem_zalloc(nsize, KM_SLEEP);
1713 1719          if (old) {
1714 1720                  bcopy(old, new, osize);
1715 1721                  kmem_free(old, osize);
1716 1722          }
1717 1723          return (new);
1718 1724  }
1719 1725  
1720 1726  /*
1721 1727   * Return non-zero if an interrupt is being serviced.
1722 1728   */
1723 1729  int
1724 1730  servicing_interrupt()
1725 1731  {
1726 1732          int onintr = 0;
1727 1733  
1728 1734          /* Are we an interrupt thread */
1729 1735          if (curthread->t_flag & T_INTR_THREAD)
1730 1736                  return (1);
1731 1737          /* Are we servicing a high level interrupt? */
1732 1738          if (CPU_ON_INTR(CPU)) {
1733 1739                  kpreempt_disable();
1734 1740                  onintr = CPU_ON_INTR(CPU);
1735 1741                  kpreempt_enable();
1736 1742          }
1737 1743          return (onintr);
1738 1744  }
1739 1745  
1740 1746  
1741 1747  /*
1742 1748   * Change the dispatch priority of a thread in the system.
1743 1749   * Used when raising or lowering a thread's priority.
1744 1750   * (E.g., priority inheritance)
1745 1751   *
1746 1752   * Since threads are queued according to their priority, we
1747 1753   * we must check the thread's state to determine whether it
1748 1754   * is on a queue somewhere. If it is, we've got to:
1749 1755   *
1750 1756   *      o Dequeue the thread.
1751 1757   *      o Change its effective priority.
1752 1758   *      o Enqueue the thread.
1753 1759   *
1754 1760   * Assumptions: The thread whose priority we wish to change
1755 1761   * must be locked before we call thread_change_(e)pri().
1756 1762   * The thread_change(e)pri() function doesn't drop the thread
1757 1763   * lock--that must be done by its caller.
1758 1764   */
1759 1765  void
1760 1766  thread_change_epri(kthread_t *t, pri_t disp_pri)
1761 1767  {
1762 1768          uint_t  state;
1763 1769  
1764 1770          ASSERT(THREAD_LOCK_HELD(t));
1765 1771  
1766 1772          /*
1767 1773           * If the inherited priority hasn't actually changed,
1768 1774           * just return.
1769 1775           */
1770 1776          if (t->t_epri == disp_pri)
1771 1777                  return;
1772 1778  
1773 1779          state = t->t_state;
1774 1780  
1775 1781          /*
1776 1782           * If it's not on a queue, change the priority with impunity.
1777 1783           */
1778 1784          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1779 1785                  t->t_epri = disp_pri;
1780 1786                  if (state == TS_ONPROC) {
1781 1787                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1782 1788  
1783 1789                          if (t == cp->cpu_dispthread)
1784 1790                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1785 1791                  }
1786 1792          } else if (state == TS_SLEEP) {
1787 1793                  /*
1788 1794                   * Take the thread out of its sleep queue.
1789 1795                   * Change the inherited priority.
1790 1796                   * Re-enqueue the thread.
1791 1797                   * Each synchronization object exports a function
1792 1798                   * to do this in an appropriate manner.
1793 1799                   */
1794 1800                  SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
1795 1801          } else if (state == TS_WAIT) {
1796 1802                  /*
1797 1803                   * Re-enqueue a thread on the wait queue if its
1798 1804                   * effective priority needs to change.
1799 1805                   */
1800 1806                  if (disp_pri != t->t_epri)
1801 1807                          waitq_change_pri(t, disp_pri);
1802 1808          } else {
1803 1809                  /*
1804 1810                   * The thread is on a run queue.
1805 1811                   * Note: setbackdq() may not put the thread
1806 1812                   * back on the same run queue where it originally
1807 1813                   * resided.
1808 1814                   */
1809 1815                  (void) dispdeq(t);
1810 1816                  t->t_epri = disp_pri;
1811 1817                  setbackdq(t);
1812 1818          }
1813 1819          schedctl_set_cidpri(t);
1814 1820  }
1815 1821  
1816 1822  /*
1817 1823   * Function: Change the t_pri field of a thread.
1818 1824   * Side Effects: Adjust the thread ordering on a run queue
1819 1825   *               or sleep queue, if necessary.
1820 1826   * Returns: 1 if the thread was on a run queue, else 0.
1821 1827   */
1822 1828  int
1823 1829  thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
1824 1830  {
1825 1831          uint_t  state;
1826 1832          int     on_rq = 0;
1827 1833  
1828 1834          ASSERT(THREAD_LOCK_HELD(t));
1829 1835  
1830 1836          state = t->t_state;
1831 1837          THREAD_WILLCHANGE_PRI(t, disp_pri);
1832 1838  
1833 1839          /*
1834 1840           * If it's not on a queue, change the priority with impunity.
1835 1841           */
1836 1842          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1837 1843                  t->t_pri = disp_pri;
1838 1844  
1839 1845                  if (state == TS_ONPROC) {
1840 1846                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1841 1847  
1842 1848                          if (t == cp->cpu_dispthread)
1843 1849                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1844 1850                  }
1845 1851          } else if (state == TS_SLEEP) {
1846 1852                  /*
1847 1853                   * If the priority has changed, take the thread out of
1848 1854                   * its sleep queue and change the priority.
1849 1855                   * Re-enqueue the thread.
1850 1856                   * Each synchronization object exports a function
1851 1857                   * to do this in an appropriate manner.
1852 1858                   */
1853 1859                  if (disp_pri != t->t_pri)
1854 1860                          SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
1855 1861          } else if (state == TS_WAIT) {
1856 1862                  /*
1857 1863                   * Re-enqueue a thread on the wait queue if its
1858 1864                   * priority needs to change.
1859 1865                   */
1860 1866                  if (disp_pri != t->t_pri)
1861 1867                          waitq_change_pri(t, disp_pri);
1862 1868          } else {
1863 1869                  /*
1864 1870                   * The thread is on a run queue.
1865 1871                   * Note: setbackdq() may not put the thread
1866 1872                   * back on the same run queue where it originally
1867 1873                   * resided.
1868 1874                   *
1869 1875                   * We still requeue the thread even if the priority
1870 1876                   * is unchanged to preserve round-robin (and other)
1871 1877                   * effects between threads of the same priority.
1872 1878                   */
1873 1879                  on_rq = dispdeq(t);
1874 1880                  ASSERT(on_rq);
1875 1881                  t->t_pri = disp_pri;
1876 1882                  if (front) {
1877 1883                          setfrontdq(t);
1878 1884                  } else {
1879 1885                          setbackdq(t);
1880 1886                  }
1881 1887          }
1882 1888          schedctl_set_cidpri(t);
1883 1889          return (on_rq);
1884 1890  }
1885 1891  
1886 1892  /*
1887 1893   * Tunable kmem_stackinfo is set, fill the kernel thread stack with a
1888 1894   * specific pattern.
1889 1895   */
1890 1896  static void
1891 1897  stkinfo_begin(kthread_t *t)
1892 1898  {
1893 1899          caddr_t start;  /* stack start */
1894 1900          caddr_t end;    /* stack end  */
1895 1901          uint64_t *ptr;  /* pattern pointer */
1896 1902  
1897 1903          /*
1898 1904           * Stack grows up or down, see thread_create(),
1899 1905           * compute stack memory area start and end (start < end).
1900 1906           */
1901 1907          if (t->t_stk > t->t_stkbase) {
1902 1908                  /* stack grows down */
1903 1909                  start = t->t_stkbase;
1904 1910                  end = t->t_stk;
1905 1911          } else {
1906 1912                  /* stack grows up */
1907 1913                  start = t->t_stk;
1908 1914                  end = t->t_stkbase;
1909 1915          }
1910 1916  
1911 1917          /*
1912 1918           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1913 1919           * alignement for start and end in stack area boundaries
1914 1920           * (protection against corrupt t_stkbase/t_stk data).
1915 1921           */
1916 1922          if ((((uintptr_t)start) & 0x7) != 0) {
1917 1923                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1918 1924          }
1919 1925          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1920 1926  
1921 1927          if ((end <= start) || (end - start) > (1024 * 1024)) {
1922 1928                  /* negative or stack size > 1 meg, assume bogus */
1923 1929                  return;
1924 1930          }
1925 1931  
1926 1932          /* fill stack area with a pattern (instead of zeros) */
1927 1933          ptr = (uint64_t *)((void *)start);
1928 1934          while (ptr < (uint64_t *)((void *)end)) {
1929 1935                  *ptr++ = KMEM_STKINFO_PATTERN;
1930 1936          }
1931 1937  }
1932 1938  
1933 1939  
1934 1940  /*
1935 1941   * Tunable kmem_stackinfo is set, create stackinfo log if doesn't already exist,
1936 1942   * compute the percentage of kernel stack really used, and set in the log
1937 1943   * if it's the latest highest percentage.
1938 1944   */
1939 1945  static void
1940 1946  stkinfo_end(kthread_t *t)
1941 1947  {
1942 1948          caddr_t start;  /* stack start */
1943 1949          caddr_t end;    /* stack end  */
1944 1950          uint64_t *ptr;  /* pattern pointer */
1945 1951          size_t stksz;   /* stack size */
1946 1952          size_t smallest = 0;
1947 1953          size_t percent = 0;
1948 1954          uint_t index = 0;
1949 1955          uint_t i;
1950 1956          static size_t smallest_percent = (size_t)-1;
1951 1957          static uint_t full = 0;
1952 1958  
1953 1959          /* create the stackinfo log, if doesn't already exist */
1954 1960          mutex_enter(&kmem_stkinfo_lock);
1955 1961          if (kmem_stkinfo_log == NULL) {
1956 1962                  kmem_stkinfo_log = (kmem_stkinfo_t *)
1957 1963                      kmem_zalloc(KMEM_STKINFO_LOG_SIZE *
1958 1964                      (sizeof (kmem_stkinfo_t)), KM_NOSLEEP);
1959 1965                  if (kmem_stkinfo_log == NULL) {
1960 1966                          mutex_exit(&kmem_stkinfo_lock);
1961 1967                          return;
1962 1968                  }
1963 1969          }
1964 1970          mutex_exit(&kmem_stkinfo_lock);
1965 1971  
1966 1972          /*
1967 1973           * Stack grows up or down, see thread_create(),
1968 1974           * compute stack memory area start and end (start < end).
1969 1975           */
1970 1976          if (t->t_stk > t->t_stkbase) {
1971 1977                  /* stack grows down */
1972 1978                  start = t->t_stkbase;
1973 1979                  end = t->t_stk;
1974 1980          } else {
1975 1981                  /* stack grows up */
1976 1982                  start = t->t_stk;
1977 1983                  end = t->t_stkbase;
1978 1984          }
1979 1985  
1980 1986          /* stack size as found in kthread_t */
1981 1987          stksz = end - start;
1982 1988  
1983 1989          /*
1984 1990           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1985 1991           * alignement for start and end in stack area boundaries
1986 1992           * (protection against corrupt t_stkbase/t_stk data).
1987 1993           */
1988 1994          if ((((uintptr_t)start) & 0x7) != 0) {
1989 1995                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1990 1996          }
1991 1997          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1992 1998  
1993 1999          if ((end <= start) || (end - start) > (1024 * 1024)) {
1994 2000                  /* negative or stack size > 1 meg, assume bogus */
1995 2001                  return;
1996 2002          }
1997 2003  
1998 2004          /* search until no pattern in the stack */
1999 2005          if (t->t_stk > t->t_stkbase) {
2000 2006                  /* stack grows down */
2001 2007  #if defined(__i386) || defined(__amd64)
2002 2008                  /*
2003 2009                   * 6 longs are pushed on stack, see thread_load(). Skip
2004 2010                   * them, so if kthread has never run, percent is zero.
2005 2011                   * 8 bytes alignement is preserved for a 32 bit kernel,
2006 2012                   * 6 x 4 = 24, 24 is a multiple of 8.
2007 2013                   *
2008 2014                   */
2009 2015                  end -= (6 * sizeof (long));
2010 2016  #endif
2011 2017                  ptr = (uint64_t *)((void *)start);
2012 2018                  while (ptr < (uint64_t *)((void *)end)) {
2013 2019                          if (*ptr != KMEM_STKINFO_PATTERN) {
2014 2020                                  percent = stkinfo_percent(end,
2015 2021                                      start, (caddr_t)ptr);
2016 2022                                  break;
2017 2023                          }
2018 2024                          ptr++;
2019 2025                  }
2020 2026          } else {
2021 2027                  /* stack grows up */
2022 2028                  ptr = (uint64_t *)((void *)end);
2023 2029                  ptr--;
2024 2030                  while (ptr >= (uint64_t *)((void *)start)) {
2025 2031                          if (*ptr != KMEM_STKINFO_PATTERN) {
2026 2032                                  percent = stkinfo_percent(start,
2027 2033                                      end, (caddr_t)ptr);
2028 2034                                  break;
2029 2035                          }
2030 2036                          ptr--;
2031 2037                  }
2032 2038          }
2033 2039  
2034 2040          DTRACE_PROBE3(stack__usage, kthread_t *, t,
2035 2041              size_t, stksz, size_t, percent);
2036 2042  
2037 2043          if (percent == 0) {
2038 2044                  return;
2039 2045          }
2040 2046  
2041 2047          mutex_enter(&kmem_stkinfo_lock);
2042 2048          if (full == KMEM_STKINFO_LOG_SIZE && percent < smallest_percent) {
2043 2049                  /*
2044 2050                   * The log is full and already contains the highest values
2045 2051                   */
2046 2052                  mutex_exit(&kmem_stkinfo_lock);
2047 2053                  return;
2048 2054          }
2049 2055  
2050 2056          /* keep a log of the highest used stack */
2051 2057          for (i = 0; i < KMEM_STKINFO_LOG_SIZE; i++) {
2052 2058                  if (kmem_stkinfo_log[i].percent == 0) {
2053 2059                          index = i;
2054 2060                          full++;
2055 2061                          break;
2056 2062                  }
2057 2063                  if (smallest == 0) {
2058 2064                          smallest = kmem_stkinfo_log[i].percent;
2059 2065                          index = i;
2060 2066                          continue;
2061 2067                  }
2062 2068                  if (kmem_stkinfo_log[i].percent < smallest) {
2063 2069                          smallest = kmem_stkinfo_log[i].percent;
2064 2070                          index = i;
2065 2071                  }
2066 2072          }
2067 2073  
2068 2074          if (percent >= kmem_stkinfo_log[index].percent) {
2069 2075                  kmem_stkinfo_log[index].kthread = (caddr_t)t;
2070 2076                  kmem_stkinfo_log[index].t_startpc = (caddr_t)t->t_startpc;
2071 2077                  kmem_stkinfo_log[index].start = start;
2072 2078                  kmem_stkinfo_log[index].stksz = stksz;
2073 2079                  kmem_stkinfo_log[index].percent = percent;
2074 2080                  kmem_stkinfo_log[index].t_tid = t->t_tid;
2075 2081                  kmem_stkinfo_log[index].cmd[0] = '\0';
2076 2082                  if (t->t_tid != 0) {
2077 2083                          stksz = strlen((t->t_procp)->p_user.u_comm);
2078 2084                          if (stksz >= KMEM_STKINFO_STR_SIZE) {
2079 2085                                  stksz = KMEM_STKINFO_STR_SIZE - 1;
2080 2086                                  kmem_stkinfo_log[index].cmd[stksz] = '\0';
2081 2087                          } else {
2082 2088                                  stksz += 1;
2083 2089                          }
2084 2090                          (void) memcpy(kmem_stkinfo_log[index].cmd,
2085 2091                              (t->t_procp)->p_user.u_comm, stksz);
2086 2092                  }
2087 2093                  if (percent < smallest_percent) {
2088 2094                          smallest_percent = percent;
2089 2095                  }
2090 2096          }
2091 2097          mutex_exit(&kmem_stkinfo_lock);
2092 2098  }
2093 2099  
2094 2100  /*
2095 2101   * Tunable kmem_stackinfo is set, compute stack utilization percentage.
2096 2102   */
2097 2103  static size_t
2098 2104  stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
2099 2105  {
2100 2106          size_t percent;
2101 2107          size_t s;
2102 2108  
2103 2109          if (t_stk > t_stkbase) {
2104 2110                  /* stack grows down */
2105 2111                  if (sp > t_stk) {
2106 2112                          return (0);
2107 2113                  }
2108 2114                  if (sp < t_stkbase) {
2109 2115                          return (100);
2110 2116                  }
2111 2117                  percent = t_stk - sp + 1;
2112 2118                  s = t_stk - t_stkbase + 1;
2113 2119          } else {
2114 2120                  /* stack grows up */
2115 2121                  if (sp < t_stk) {
2116 2122                          return (0);
2117 2123                  }
2118 2124                  if (sp > t_stkbase) {

↓ open down ↓

1315 lines elided

↑ open up ↑

2119 2125                          return (100);
2120 2126                  }
2121 2127                  percent = sp - t_stk + 1;
2122 2128                  s = t_stkbase - t_stk + 1;
2123 2129          }
2124 2130          percent = ((100 * percent) / s) + 1;
2125 2131          if (percent > 100) {
2126 2132                  percent = 100;
2127 2133          }
2128 2134          return (percent);
     2135 +}
     2136 +
     2137 +/*
     2138 + * NOTE: This will silently truncate a name > THREAD_NAME_MAX - 1 characters
     2139 + * long.  It is expected that callers (acting on behalf of userland clients)
     2140 + * will perform any required checks to return the correct error semantics.
     2141 + * It is also expected callers on behalf of userland clients have done
     2142 + * any necessary permission checks.
     2143 + */
     2144 +int
     2145 +thread_setname(kthread_t *t, const char *name)
     2146 +{
     2147 +        char *buf = NULL;
     2148 +
     2149 +        /*
     2150 +         * We optimistically assume that a thread's name will only be set
     2151 +         * once and so allocate memory in preparation of setting t_name.
     2152 +         * If it turns out a name has already been set, we just discard (free)
     2153 +         * the buffer we just allocated and reuse the current buffer
     2154 +         * (as all should be THREAD_NAME_MAX large).
     2155 +         *
     2156 +         * Such an arrangement means over the lifetime of a kthread_t, t_name
     2157 +         * is either NULL or has one value (the address of the buffer holding
     2158 +         * the current thread name).   The assumption is that most kthread_t
     2159 +         * instances will not have a name assigned, so dynamically allocating
     2160 +         * the memory should minimize the footprint of this feature, but by
     2161 +         * having the buffer persist for the life of the thread, it simplifies
     2162 +         * usage in highly constrained situations (e.g. dtrace).
     2163 +         */
     2164 +        if (name != NULL && name[0] != '\0') {
     2165 +                for (size_t i = 0; name[i] != '\0'; i++) {
     2166 +                        if (!isprint(name[i]))
     2167 +                                return (EINVAL);
     2168 +                }
     2169 +
     2170 +                buf = kmem_zalloc(THREAD_NAME_MAX, KM_SLEEP);
     2171 +                (void) strlcpy(buf, name, THREAD_NAME_MAX);
     2172 +        }
     2173 +
     2174 +        mutex_enter(&ttoproc(t)->p_lock);
     2175 +        if (t->t_name == NULL) {
     2176 +                t->t_name = buf;
     2177 +        } else {
     2178 +                if (buf != NULL) {
     2179 +                        (void) strlcpy(t->t_name, name, THREAD_NAME_MAX);
     2180 +                        kmem_free(buf, THREAD_NAME_MAX);
     2181 +                } else {
     2182 +                        bzero(t->t_name, THREAD_NAME_MAX);
     2183 +                }
     2184 +        }
     2185 +        mutex_exit(&ttoproc(t)->p_lock);
     2186 +        return (0);
     2187 +}
     2188 +
     2189 +int
     2190 +thread_vsetname(kthread_t *t, const char *fmt, ...)
     2191 +{
     2192 +        char name[THREAD_NAME_MAX];
     2193 +        va_list va;
     2194 +        int rc;
     2195 +
     2196 +        va_start(va, fmt);
     2197 +        rc = vsnprintf(name, sizeof (name), fmt, va);
     2198 +        va_end(va);
     2199 +
     2200 +        if (rc < 0)
     2201 +                return (EINVAL);
     2202 +
     2203 +        if (rc >= sizeof (name))
     2204 +                return (ENAMETOOLONG);
     2205 +
     2206 +        return (thread_setname(t, name));
2129 2207  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX