illumos-gate Wdiff usr/src/uts/common/disp/thread.c

Print this page

3625 we only need one thread_create_intr

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/disp/thread.c
          +++ new/usr/src/uts/common/disp/thread.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/signal.h>
  30   30  #include <sys/stack.h>
  31   31  #include <sys/pcb.h>
  32   32  #include <sys/user.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/sysinfo.h>
  35   35  #include <sys/errno.h>
  36   36  #include <sys/cmn_err.h>
  37   37  #include <sys/cred.h>
  38   38  #include <sys/resource.h>
  39   39  #include <sys/task.h>
  40   40  #include <sys/project.h>
  41   41  #include <sys/proc.h>
  42   42  #include <sys/debug.h>
  43   43  #include <sys/disp.h>
  44   44  #include <sys/class.h>
  45   45  #include <vm/seg_kmem.h>
  46   46  #include <vm/seg_kp.h>
  47   47  #include <sys/machlock.h>
  48   48  #include <sys/kmem.h>
  49   49  #include <sys/varargs.h>
  50   50  #include <sys/turnstile.h>
  51   51  #include <sys/poll.h>
  52   52  #include <sys/vtrace.h>
  53   53  #include <sys/callb.h>
  54   54  #include <c2/audit.h>
  55   55  #include <sys/tnf.h>
  56   56  #include <sys/sobject.h>
  57   57  #include <sys/cpupart.h>
  58   58  #include <sys/pset.h>
  59   59  #include <sys/door.h>
  60   60  #include <sys/spl.h>
  61   61  #include <sys/copyops.h>
  62   62  #include <sys/rctl.h>
  63   63  #include <sys/brand.h>
  64   64  #include <sys/pool.h>
  65   65  #include <sys/zone.h>
  66   66  #include <sys/tsol/label.h>
  67   67  #include <sys/tsol/tndb.h>
  68   68  #include <sys/cpc_impl.h>
  69   69  #include <sys/sdt.h>
  70   70  #include <sys/reboot.h>
  71   71  #include <sys/kdi.h>
  72   72  #include <sys/schedctl.h>
  73   73  #include <sys/waitq.h>
  74   74  #include <sys/cpucaps.h>
  75   75  #include <sys/kiconv.h>
  76   76  
  77   77  struct kmem_cache *thread_cache;        /* cache of free threads */
  78   78  struct kmem_cache *lwp_cache;           /* cache of free lwps */
  79   79  struct kmem_cache *turnstile_cache;     /* cache of free turnstiles */
  80   80  
  81   81  /*
  82   82   * allthreads is only for use by kmem_readers.  All kernel loops can use
  83   83   * the current thread as a start/end point.
  84   84   */
  85   85  static kthread_t *allthreads = &t0;     /* circular list of all threads */
  86   86  
  87   87  static kcondvar_t reaper_cv;            /* synchronization var */
  88   88  kthread_t       *thread_deathrow;       /* circular list of reapable threads */
  89   89  kthread_t       *lwp_deathrow;          /* circular list of reapable threads */
  90   90  kmutex_t        reaplock;               /* protects lwp and thread deathrows */
  91   91  int     thread_reapcnt = 0;             /* number of threads on deathrow */
  92   92  int     lwp_reapcnt = 0;                /* number of lwps on deathrow */
  93   93  int     reaplimit = 16;                 /* delay reaping until reaplimit */
  94   94  
  95   95  thread_free_lock_t      *thread_free_lock;
  96   96                                          /* protects tick thread from reaper */
  97   97  
  98   98  extern int nthread;
  99   99  
 100  100  /* System Scheduling classes. */
 101  101  id_t    syscid;                         /* system scheduling class ID */
 102  102  id_t    sysdccid = CLASS_UNUSED;        /* reset when SDC loads */
 103  103  
 104  104  void    *segkp_thread;                  /* cookie for segkp pool */
 105  105  
 106  106  int lwp_cache_sz = 32;
 107  107  int t_cache_sz = 8;
 108  108  static kt_did_t next_t_id = 1;
 109  109  
 110  110  /* Default mode for thread binding to CPUs and processor sets */
 111  111  int default_binding_mode = TB_ALLHARD;
 112  112  
 113  113  /*
 114  114   * Min/Max stack sizes for stack size parameters
 115  115   */
 116  116  #define MAX_STKSIZE     (32 * DEFAULTSTKSZ)
 117  117  #define MIN_STKSIZE     DEFAULTSTKSZ
 118  118  
 119  119  /*
 120  120   * default_stksize overrides lwp_default_stksize if it is set.
 121  121   */
 122  122  int     default_stksize;
 123  123  int     lwp_default_stksize;
 124  124  
 125  125  static zone_key_t zone_thread_key;
 126  126  
 127  127  unsigned int kmem_stackinfo;            /* stackinfo feature on-off */
 128  128  kmem_stkinfo_t *kmem_stkinfo_log;       /* stackinfo circular log */
 129  129  static kmutex_t kmem_stkinfo_lock;      /* protects kmem_stkinfo_log */
 130  130  
 131  131  /*
 132  132   * forward declarations for internal thread specific data (tsd)
 133  133   */
 134  134  static void *tsd_realloc(void *, size_t, size_t);
 135  135  
 136  136  void thread_reaper(void);
 137  137  
 138  138  /* forward declarations for stackinfo feature */
 139  139  static void stkinfo_begin(kthread_t *);
 140  140  static void stkinfo_end(kthread_t *);
 141  141  static size_t stkinfo_percent(caddr_t, caddr_t, caddr_t);
 142  142  
 143  143  /*ARGSUSED*/
 144  144  static int
 145  145  turnstile_constructor(void *buf, void *cdrarg, int kmflags)
 146  146  {
 147  147          bzero(buf, sizeof (turnstile_t));
 148  148          return (0);
 149  149  }
 150  150  
 151  151  /*ARGSUSED*/
 152  152  static void
 153  153  turnstile_destructor(void *buf, void *cdrarg)
 154  154  {
 155  155          turnstile_t *ts = buf;
 156  156  
 157  157          ASSERT(ts->ts_free == NULL);
 158  158          ASSERT(ts->ts_waiters == 0);
 159  159          ASSERT(ts->ts_inheritor == NULL);
 160  160          ASSERT(ts->ts_sleepq[0].sq_first == NULL);
 161  161          ASSERT(ts->ts_sleepq[1].sq_first == NULL);
 162  162  }
 163  163  
 164  164  void
 165  165  thread_init(void)
 166  166  {
 167  167          kthread_t *tp;
 168  168          extern char sys_name[];
 169  169          extern void idle();
 170  170          struct cpu *cpu = CPU;
 171  171          int i;
 172  172          kmutex_t *lp;
 173  173  
 174  174          mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
 175  175          thread_free_lock =
 176  176              kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
 177  177          for (i = 0; i < THREAD_FREE_NUM; i++) {
 178  178                  lp = &thread_free_lock[i].tf_lock;
 179  179                  mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
 180  180          }
 181  181  
 182  182  #if defined(__i386) || defined(__amd64)
 183  183          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 184  184              PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
 185  185  
 186  186          /*
 187  187           * "struct _klwp" includes a "struct pcb", which includes a
 188  188           * "struct fpu", which needs to be 64-byte aligned on amd64
 189  189           * (and even on i386) for xsave/xrstor.
 190  190           */
 191  191          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 192  192              64, NULL, NULL, NULL, NULL, NULL, 0);
 193  193  #else
 194  194          /*
 195  195           * Allocate thread structures from static_arena.  This prevents
 196  196           * issues where a thread tries to relocate its own thread
 197  197           * structure and touches it after the mapping has been suspended.
 198  198           */
 199  199          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 200  200              PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
 201  201  
 202  202          lwp_stk_cache_init();
 203  203  
 204  204          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 205  205              0, NULL, NULL, NULL, NULL, NULL, 0);
 206  206  #endif
 207  207  
 208  208          turnstile_cache = kmem_cache_create("turnstile_cache",
 209  209              sizeof (turnstile_t), 0,
 210  210              turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
 211  211  
 212  212          label_init();
 213  213          cred_init();
 214  214  
 215  215          /*
 216  216           * Initialize various resource management facilities.
 217  217           */
 218  218          rctl_init();
 219  219          cpucaps_init();
 220  220          /*
 221  221           * Zone_init() should be called before project_init() so that project ID
 222  222           * for the first project is initialized correctly.
 223  223           */
 224  224          zone_init();
 225  225          project_init();
 226  226          brand_init();
 227  227          kiconv_init();
 228  228          task_init();
 229  229          tcache_init();
 230  230          pool_init();
 231  231  
 232  232          curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 233  233  
 234  234          /*
 235  235           * Originally, we had two parameters to set default stack
 236  236           * size: one for lwp's (lwp_default_stksize), and one for
 237  237           * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
 238  238           * Now we have a third parameter that overrides both if it is
 239  239           * set to a legal stack size, called default_stksize.
 240  240           */
 241  241  
 242  242          if (default_stksize == 0) {
 243  243                  default_stksize = DEFAULTSTKSZ;
 244  244          } else if (default_stksize % PAGESIZE != 0 ||
 245  245              default_stksize > MAX_STKSIZE ||
 246  246              default_stksize < MIN_STKSIZE) {
 247  247                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 248  248                      (int)DEFAULTSTKSZ);
 249  249                  default_stksize = DEFAULTSTKSZ;
 250  250          } else {
 251  251                  lwp_default_stksize = default_stksize;
 252  252          }
 253  253  
 254  254          if (lwp_default_stksize == 0) {
 255  255                  lwp_default_stksize = default_stksize;
 256  256          } else if (lwp_default_stksize % PAGESIZE != 0 ||
 257  257              lwp_default_stksize > MAX_STKSIZE ||
 258  258              lwp_default_stksize < MIN_STKSIZE) {
 259  259                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 260  260                      default_stksize);
 261  261                  lwp_default_stksize = default_stksize;
 262  262          }
 263  263  
 264  264          segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
 265  265              lwp_default_stksize,
 266  266              (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
 267  267  
 268  268          segkp_thread = segkp_cache_init(segkp, t_cache_sz,
 269  269              default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
 270  270  
 271  271          (void) getcid(sys_name, &syscid);
 272  272          curthread->t_cid = syscid;      /* current thread is t0 */
 273  273  
 274  274          /*
 275  275           * Set up the first CPU's idle thread.
 276  276           * It runs whenever the CPU has nothing worthwhile to do.
 277  277           */
 278  278          tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
 279  279          cpu->cpu_idle_thread = tp;
 280  280          tp->t_preempt = 1;
 281  281          tp->t_disp_queue = cpu->cpu_disp;
 282  282          ASSERT(tp->t_disp_queue != NULL);
 283  283          tp->t_bound_cpu = cpu;
 284  284          tp->t_affinitycnt = 1;
 285  285  
 286  286          /*
 287  287           * Registering a thread in the callback table is usually
 288  288           * done in the initialization code of the thread. In this
 289  289           * case, we do it right after thread creation to avoid
 290  290           * blocking idle thread while registering itself. It also
 291  291           * avoids the possibility of reregistration in case a CPU
 292  292           * restarts its idle thread.
 293  293           */
 294  294          CALLB_CPR_INIT_SAFE(tp, "idle");
 295  295  
 296  296          /*
 297  297           * Create the thread_reaper daemon. From this point on, exited
 298  298           * threads will get reaped.
 299  299           */
 300  300          (void) thread_create(NULL, 0, (void (*)())thread_reaper,
 301  301              NULL, 0, &p0, TS_RUN, minclsyspri);
 302  302  
 303  303          /*
 304  304           * Finish initializing the kernel memory allocator now that
 305  305           * thread_create() is available.
 306  306           */
 307  307          kmem_thread_init();
 308  308  
 309  309          if (boothowto & RB_DEBUG)
 310  310                  kdi_dvec_thravail();
 311  311  }

↓ open down ↓

311 lines elided

↑ open up ↑

 312  312  
 313  313  /*
 314  314   * Create a thread.
 315  315   *
 316  316   * thread_create() blocks for memory if necessary.  It never fails.
 317  317   *
 318  318   * If stk is NULL, the thread is created at the base of the stack
 319  319   * and cannot be swapped.
 320  320   */
 321  321  kthread_t *
 322      -thread_create(
 323      -        caddr_t stk,
 324      -        size_t  stksize,
 325      -        void    (*proc)(),
 326      -        void    *arg,
 327      -        size_t  len,
 328      -        proc_t   *pp,
 329      -        int     state,
 330      -        pri_t   pri)
      322 +thread_create(caddr_t stk, size_t stksize, void (*proc)(), void *arg,
      323 +    size_t len, proc_t *pp, int state, pri_t pri)
 331  324  {
 332  325          kthread_t *t;
 333  326          extern struct classfuncs sys_classfuncs;
 334  327          turnstile_t *ts;
 335  328  
 336  329          /*
 337  330           * Every thread keeps a turnstile around in case it needs to block.
 338  331           * The only reason the turnstile is not simply part of the thread
 339  332           * structure is that we may have to break the association whenever
 340  333           * more than one thread blocks on a given synchronization object.

 341  334           * From a memory-management standpoint, turnstiles are like the
 342  335           * "attached mblks" that hang off dblks in the streams allocator.
 343  336           */
 344  337          ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 345  338  
 346  339          if (stk == NULL) {
 347  340                  /*
 348  341                   * alloc both thread and stack in segkp chunk
 349  342                   */
 350  343  
 351  344                  if (stksize < default_stksize)
 352  345                          stksize = default_stksize;
 353  346  
 354  347                  if (stksize == default_stksize) {
 355  348                          stk = (caddr_t)segkp_cache_get(segkp_thread);
 356  349                  } else {
 357  350                          stksize = roundup(stksize, PAGESIZE);
 358  351                          stk = (caddr_t)segkp_get(segkp, stksize,
 359  352                              (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
 360  353                  }
 361  354  
 362  355                  ASSERT(stk != NULL);
 363  356  
 364  357                  /*
 365  358                   * The machine-dependent mutex code may require that
 366  359                   * thread pointers (since they may be used for mutex owner
 367  360                   * fields) have certain alignment requirements.
 368  361                   * PTR24_ALIGN is the size of the alignment quanta.
 369  362                   * XXX - assumes stack grows toward low addresses.
 370  363                   */
 371  364                  if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
 372  365                          cmn_err(CE_PANIC, "thread_create: proposed stack size"
 373  366                              " too small to hold thread.");
 374  367  #ifdef STACK_GROWTH_DOWN
 375  368                  stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
 376  369                  stksize &= -PTR24_ALIGN;        /* make thread aligned */
 377  370                  t = (kthread_t *)(stk + stksize);
 378  371                  bzero(t, sizeof (kthread_t));
 379  372                  if (audit_active)
 380  373                          audit_thread_create(t);
 381  374                  t->t_stk = stk + stksize;
 382  375                  t->t_stkbase = stk;
 383  376  #else   /* stack grows to larger addresses */
 384  377                  stksize -= SA(sizeof (kthread_t));
 385  378                  t = (kthread_t *)(stk);
 386  379                  bzero(t, sizeof (kthread_t));
 387  380                  t->t_stk = stk + sizeof (kthread_t);
 388  381                  t->t_stkbase = stk + stksize + sizeof (kthread_t);
 389  382  #endif  /* STACK_GROWTH_DOWN */
 390  383                  t->t_flag |= T_TALLOCSTK;
 391  384                  t->t_swap = stk;
 392  385          } else {
 393  386                  t = kmem_cache_alloc(thread_cache, KM_SLEEP);
 394  387                  bzero(t, sizeof (kthread_t));
 395  388                  ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
 396  389                  if (audit_active)
 397  390                          audit_thread_create(t);
 398  391                  /*
 399  392                   * Initialize t_stk to the kernel stack pointer to use
 400  393                   * upon entry to the kernel
 401  394                   */
 402  395  #ifdef STACK_GROWTH_DOWN
 403  396                  t->t_stk = stk + stksize;
 404  397                  t->t_stkbase = stk;
 405  398  #else
 406  399                  t->t_stk = stk;                 /* 3b2-like */
 407  400                  t->t_stkbase = stk + stksize;
 408  401  #endif /* STACK_GROWTH_DOWN */
 409  402          }
 410  403  
 411  404          if (kmem_stackinfo != 0) {
 412  405                  stkinfo_begin(t);
 413  406          }
 414  407  
 415  408          t->t_ts = ts;
 416  409  
 417  410          /*
 418  411           * p_cred could be NULL if it thread_create is called before cred_init
 419  412           * is called in main.
 420  413           */
 421  414          mutex_enter(&pp->p_crlock);
 422  415          if (pp->p_cred)
 423  416                  crhold(t->t_cred = pp->p_cred);
 424  417          mutex_exit(&pp->p_crlock);
 425  418          t->t_start = gethrestime_sec();
 426  419          t->t_startpc = proc;
 427  420          t->t_procp = pp;
 428  421          t->t_clfuncs = &sys_classfuncs.thread;
 429  422          t->t_cid = syscid;
 430  423          t->t_pri = pri;
 431  424          t->t_stime = ddi_get_lbolt();
 432  425          t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
 433  426          t->t_bind_cpu = PBIND_NONE;
 434  427          t->t_bindflag = (uchar_t)default_binding_mode;
 435  428          t->t_bind_pset = PS_NONE;
 436  429          t->t_plockp = &pp->p_lock;
 437  430          t->t_copyops = NULL;
 438  431          t->t_taskq = NULL;
 439  432          t->t_anttime = 0;
 440  433          t->t_hatdepth = 0;
 441  434  
 442  435          t->t_dtrace_vtime = 1;  /* assure vtimestamp is always non-zero */
 443  436  
 444  437          CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
 445  438  #ifndef NPROBE
 446  439          /* Kernel probe */
 447  440          tnf_thread_create(t);
 448  441  #endif /* NPROBE */
 449  442          LOCK_INIT_CLEAR(&t->t_lock);
 450  443  
 451  444          /*
 452  445           * Callers who give us a NULL proc must do their own
 453  446           * stack initialization.  e.g. lwp_create()
 454  447           */
 455  448          if (proc != NULL) {
 456  449                  t->t_stk = thread_stk_init(t->t_stk);
 457  450                  thread_load(t, proc, arg, len);
 458  451          }
 459  452  
 460  453          /*
 461  454           * Put a hold on project0. If this thread is actually in a
 462  455           * different project, then t_proj will be changed later in
 463  456           * lwp_create().  All kernel-only threads must be in project 0.
 464  457           */
 465  458          t->t_proj = project_hold(proj0p);
 466  459  
 467  460          lgrp_affinity_init(&t->t_lgrp_affinity);
 468  461  
 469  462          mutex_enter(&pidlock);
 470  463          nthread++;
 471  464          t->t_did = next_t_id++;
 472  465          t->t_prev = curthread->t_prev;
 473  466          t->t_next = curthread;
 474  467  
 475  468          /*
 476  469           * Add the thread to the list of all threads, and initialize
 477  470           * its t_cpu pointer.  We need to block preemption since
 478  471           * cpu_offline walks the thread list looking for threads
 479  472           * with t_cpu pointing to the CPU being offlined.  We want
 480  473           * to make sure that the list is consistent and that if t_cpu
 481  474           * is set, the thread is on the list.
 482  475           */
 483  476          kpreempt_disable();
 484  477          curthread->t_prev->t_next = t;
 485  478          curthread->t_prev = t;
 486  479  
 487  480          /*
 488  481           * Threads should never have a NULL t_cpu pointer so assign it
 489  482           * here.  If the thread is being created with state TS_RUN a
 490  483           * better CPU may be chosen when it is placed on the run queue.
 491  484           *
 492  485           * We need to keep kernel preemption disabled when setting all
 493  486           * three fields to keep them in sync.  Also, always create in
 494  487           * the default partition since that's where kernel threads go
 495  488           * (if this isn't a kernel thread, t_cpupart will be changed
 496  489           * in lwp_create before setting the thread runnable).
 497  490           */
 498  491          t->t_cpupart = &cp_default;
 499  492  
 500  493          /*
 501  494           * For now, affiliate this thread with the root lgroup.
 502  495           * Since the kernel does not (presently) allocate its memory
 503  496           * in a locality aware fashion, the root is an appropriate home.
 504  497           * If this thread is later associated with an lwp, it will have
 505  498           * it's lgroup re-assigned at that time.
 506  499           */
 507  500          lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
 508  501  
 509  502          /*
 510  503           * Inherit the current cpu.  If this cpu isn't part of the chosen
 511  504           * lgroup, a new cpu will be chosen by cpu_choose when the thread
 512  505           * is ready to run.
 513  506           */
 514  507          if (CPU->cpu_part == &cp_default)
 515  508                  t->t_cpu = CPU;
 516  509          else
 517  510                  t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
 518  511                      t->t_pri, NULL);
 519  512  
 520  513          t->t_disp_queue = t->t_cpu->cpu_disp;
 521  514          kpreempt_enable();
 522  515  
 523  516          /*
 524  517           * Initialize thread state and the dispatcher lock pointer.
 525  518           * Need to hold onto pidlock to block allthreads walkers until
 526  519           * the state is set.
 527  520           */
 528  521          switch (state) {
 529  522          case TS_RUN:
 530  523                  curthread->t_oldspl = splhigh();        /* get dispatcher spl */
 531  524                  THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
 532  525                  CL_SETRUN(t);
 533  526                  thread_unlock(t);
 534  527                  break;
 535  528  
 536  529          case TS_ONPROC:
 537  530                  THREAD_ONPROC(t, t->t_cpu);
 538  531                  break;
 539  532  
 540  533          case TS_FREE:
 541  534                  /*
 542  535                   * Free state will be used for intr threads.
 543  536                   * The interrupt routine must set the thread dispatcher
 544  537                   * lock pointer (t_lockp) if starting on a CPU
 545  538                   * other than the current one.
 546  539                   */
 547  540                  THREAD_FREEINTR(t, CPU);
 548  541                  break;
 549  542  
 550  543          case TS_STOPPED:
 551  544                  THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
 552  545                  break;
 553  546  
 554  547          default:                        /* TS_SLEEP, TS_ZOMB or TS_TRANS */
 555  548                  cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
 556  549          }
 557  550          mutex_exit(&pidlock);
 558  551          return (t);
 559  552  }
 560  553  
 561  554  /*
 562  555   * Move thread to project0 and take care of project reference counters.
 563  556   */
 564  557  void
 565  558  thread_rele(kthread_t *t)
 566  559  {
 567  560          kproject_t *kpj;
 568  561  
 569  562          thread_lock(t);
 570  563  
 571  564          ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
 572  565          kpj = ttoproj(t);
 573  566          t->t_proj = proj0p;
 574  567  
 575  568          thread_unlock(t);
 576  569  
 577  570          if (kpj != proj0p) {
 578  571                  project_rele(kpj);
 579  572                  (void) project_hold(proj0p);
 580  573          }
 581  574  }
 582  575  
 583  576  void
 584  577  thread_exit(void)
 585  578  {
 586  579          kthread_t *t = curthread;
 587  580  
 588  581          if ((t->t_proc_flag & TP_ZTHREAD) != 0)
 589  582                  cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
 590  583  
 591  584          tsd_exit();             /* Clean up this thread's TSD */
 592  585  
 593  586          kcpc_passivate();       /* clean up performance counter state */
 594  587  
 595  588          /*
 596  589           * No kernel thread should have called poll() without arranging
 597  590           * calling pollcleanup() here.
 598  591           */
 599  592          ASSERT(t->t_pollstate == NULL);
 600  593          ASSERT(t->t_schedctl == NULL);
 601  594          if (t->t_door)
 602  595                  door_slam();    /* in case thread did an upcall */
 603  596  
 604  597  #ifndef NPROBE
 605  598          /* Kernel probe */
 606  599          if (t->t_tnf_tpdp)
 607  600                  tnf_thread_exit();
 608  601  #endif /* NPROBE */
 609  602  
 610  603          thread_rele(t);
 611  604          t->t_preempt++;
 612  605  
 613  606          /*
 614  607           * remove thread from the all threads list so that
 615  608           * death-row can use the same pointers.
 616  609           */
 617  610          mutex_enter(&pidlock);
 618  611          t->t_next->t_prev = t->t_prev;
 619  612          t->t_prev->t_next = t->t_next;
 620  613          ASSERT(allthreads != t);        /* t0 never exits */
 621  614          cv_broadcast(&t->t_joincv);     /* wake up anyone in thread_join */
 622  615          mutex_exit(&pidlock);
 623  616  
 624  617          if (t->t_ctx != NULL)
 625  618                  exitctx(t);
 626  619          if (t->t_procp->p_pctx != NULL)
 627  620                  exitpctx(t->t_procp);
 628  621  
 629  622          if (kmem_stackinfo != 0) {
 630  623                  stkinfo_end(t);
 631  624          }
 632  625  
 633  626          t->t_state = TS_ZOMB;   /* set zombie thread */
 634  627  
 635  628          swtch_from_zombie();    /* give up the CPU */
 636  629          /* NOTREACHED */
 637  630  }
 638  631  
 639  632  /*
 640  633   * Check to see if the specified thread is active (defined as being on
 641  634   * the thread list).  This is certainly a slow way to do this; if there's
 642  635   * ever a reason to speed it up, we could maintain a hash table of active
 643  636   * threads indexed by their t_did.
 644  637   */
 645  638  static kthread_t *
 646  639  did_to_thread(kt_did_t tid)
 647  640  {
 648  641          kthread_t *t;
 649  642  
 650  643          ASSERT(MUTEX_HELD(&pidlock));
 651  644          for (t = curthread->t_next; t != curthread; t = t->t_next) {
 652  645                  if (t->t_did == tid)
 653  646                          break;
 654  647          }
 655  648          if (t->t_did == tid)
 656  649                  return (t);
 657  650          else
 658  651                  return (NULL);
 659  652  }
 660  653  
 661  654  /*
 662  655   * Wait for specified thread to exit.  Returns immediately if the thread
 663  656   * could not be found, meaning that it has either already exited or never
 664  657   * existed.
 665  658   */
 666  659  void
 667  660  thread_join(kt_did_t tid)
 668  661  {
 669  662          kthread_t *t;
 670  663  
 671  664          ASSERT(tid != curthread->t_did);
 672  665          ASSERT(tid != t0.t_did);
 673  666  
 674  667          mutex_enter(&pidlock);
 675  668          /*
 676  669           * Make sure we check that the thread is on the thread list
 677  670           * before blocking on it; otherwise we could end up blocking on
 678  671           * a cv that's already been freed.  In other words, don't cache
 679  672           * the thread pointer across calls to cv_wait.
 680  673           *
 681  674           * The choice of loop invariant means that whenever a thread
 682  675           * is taken off the allthreads list, a cv_broadcast must be
 683  676           * performed on that thread's t_joincv to wake up any waiters.
 684  677           * The broadcast doesn't have to happen right away, but it
 685  678           * shouldn't be postponed indefinitely (e.g., by doing it in
 686  679           * thread_free which may only be executed when the deathrow
 687  680           * queue is processed.
 688  681           */
 689  682          while (t = did_to_thread(tid))
 690  683                  cv_wait(&t->t_joincv, &pidlock);
 691  684          mutex_exit(&pidlock);
 692  685  }
 693  686  
 694  687  void
 695  688  thread_free_prevent(kthread_t *t)
 696  689  {
 697  690          kmutex_t *lp;
 698  691  
 699  692          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 700  693          mutex_enter(lp);
 701  694  }
 702  695  
 703  696  void
 704  697  thread_free_allow(kthread_t *t)
 705  698  {
 706  699          kmutex_t *lp;
 707  700  
 708  701          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 709  702          mutex_exit(lp);
 710  703  }
 711  704  
 712  705  static void
 713  706  thread_free_barrier(kthread_t *t)
 714  707  {
 715  708          kmutex_t *lp;
 716  709  
 717  710          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 718  711          mutex_enter(lp);
 719  712          mutex_exit(lp);
 720  713  }
 721  714  
 722  715  void
 723  716  thread_free(kthread_t *t)
 724  717  {
 725  718          boolean_t allocstk = (t->t_flag & T_TALLOCSTK);
 726  719          klwp_t *lwp = t->t_lwp;
 727  720          caddr_t swap = t->t_swap;
 728  721  
 729  722          ASSERT(t != &t0 && t->t_state == TS_FREE);
 730  723          ASSERT(t->t_door == NULL);
 731  724          ASSERT(t->t_schedctl == NULL);
 732  725          ASSERT(t->t_pollstate == NULL);
 733  726  
 734  727          t->t_pri = 0;
 735  728          t->t_pc = 0;
 736  729          t->t_sp = 0;
 737  730          t->t_wchan0 = NULL;
 738  731          t->t_wchan = NULL;
 739  732          if (t->t_cred != NULL) {
 740  733                  crfree(t->t_cred);
 741  734                  t->t_cred = 0;
 742  735          }
 743  736          if (t->t_pdmsg) {
 744  737                  kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
 745  738                  t->t_pdmsg = NULL;
 746  739          }
 747  740          if (audit_active)
 748  741                  audit_thread_free(t);
 749  742  #ifndef NPROBE
 750  743          if (t->t_tnf_tpdp)
 751  744                  tnf_thread_free(t);
 752  745  #endif /* NPROBE */
 753  746          if (t->t_cldata) {
 754  747                  CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
 755  748          }
 756  749          if (t->t_rprof != NULL) {
 757  750                  kmem_free(t->t_rprof, sizeof (*t->t_rprof));
 758  751                  t->t_rprof = NULL;
 759  752          }
 760  753          t->t_lockp = NULL;      /* nothing should try to lock this thread now */
 761  754          if (lwp)
 762  755                  lwp_freeregs(lwp, 0);
 763  756          if (t->t_ctx)
 764  757                  freectx(t, 0);
 765  758          t->t_stk = NULL;
 766  759          if (lwp)
 767  760                  lwp_stk_fini(lwp);
 768  761          lock_clear(&t->t_lock);
 769  762  
 770  763          if (t->t_ts->ts_waiters > 0)
 771  764                  panic("thread_free: turnstile still active");
 772  765  
 773  766          kmem_cache_free(turnstile_cache, t->t_ts);
 774  767  
 775  768          free_afd(&t->t_activefd);
 776  769  
 777  770          /*
 778  771           * Barrier for the tick accounting code.  The tick accounting code
 779  772           * holds this lock to keep the thread from going away while it's
 780  773           * looking at it.
 781  774           */
 782  775          thread_free_barrier(t);
 783  776  
 784  777          ASSERT(ttoproj(t) == proj0p);
 785  778          project_rele(ttoproj(t));
 786  779  
 787  780          lgrp_affinity_free(&t->t_lgrp_affinity);
 788  781  
 789  782          mutex_enter(&pidlock);
 790  783          nthread--;
 791  784          mutex_exit(&pidlock);
 792  785  
 793  786          /*
 794  787           * Free thread, lwp and stack.  This needs to be done carefully, since
 795  788           * if T_TALLOCSTK is set, the thread is part of the stack.
 796  789           */
 797  790          t->t_lwp = NULL;
 798  791          t->t_swap = NULL;
 799  792  
 800  793          if (swap) {
 801  794                  segkp_release(segkp, swap);
 802  795          }
 803  796          if (lwp) {
 804  797                  kmem_cache_free(lwp_cache, lwp);
 805  798          }
 806  799          if (!allocstk) {
 807  800                  kmem_cache_free(thread_cache, t);
 808  801          }
 809  802  }
 810  803  
 811  804  /*
 812  805   * Removes threads associated with the given zone from a deathrow queue.
 813  806   * tp is a pointer to the head of the deathrow queue, and countp is a
 814  807   * pointer to the current deathrow count.  Returns a linked list of
 815  808   * threads removed from the list.
 816  809   */
 817  810  static kthread_t *
 818  811  thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
 819  812  {
 820  813          kthread_t *tmp, *list = NULL;
 821  814          cred_t *cr;
 822  815  
 823  816          ASSERT(MUTEX_HELD(&reaplock));
 824  817          while (*tp != NULL) {
 825  818                  if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
 826  819                          tmp = *tp;
 827  820                          *tp = tmp->t_forw;
 828  821                          tmp->t_forw = list;
 829  822                          list = tmp;
 830  823                          (*countp)--;
 831  824                  } else {
 832  825                          tp = &(*tp)->t_forw;
 833  826                  }
 834  827          }
 835  828          return (list);
 836  829  }
 837  830  
 838  831  static void
 839  832  thread_reap_list(kthread_t *t)
 840  833  {
 841  834          kthread_t *next;
 842  835  
 843  836          while (t != NULL) {
 844  837                  next = t->t_forw;
 845  838                  thread_free(t);
 846  839                  t = next;
 847  840          }
 848  841  }
 849  842  
 850  843  /* ARGSUSED */
 851  844  static void
 852  845  thread_zone_destroy(zoneid_t zoneid, void *unused)
 853  846  {
 854  847          kthread_t *t, *l;
 855  848  
 856  849          mutex_enter(&reaplock);
 857  850          /*
 858  851           * Pull threads and lwps associated with zone off deathrow lists.
 859  852           */
 860  853          t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
 861  854          l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
 862  855          mutex_exit(&reaplock);
 863  856  
 864  857          /*
 865  858           * Guard against race condition in mutex_owner_running:
 866  859           *      thread=owner(mutex)
 867  860           *      <interrupt>
 868  861           *                              thread exits mutex
 869  862           *                              thread exits
 870  863           *                              thread reaped
 871  864           *                              thread struct freed
 872  865           * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 873  866           * A cross call to all cpus will cause the interrupt handler
 874  867           * to reset the PC if it is in mutex_owner_running, refreshing
 875  868           * stale thread pointers.
 876  869           */
 877  870          mutex_sync();   /* sync with mutex code */
 878  871  
 879  872          /*
 880  873           * Reap threads
 881  874           */
 882  875          thread_reap_list(t);
 883  876

↓ open down ↓

543 lines elided

↑ open up ↑

 884  877          /*
 885  878           * Reap lwps
 886  879           */
 887  880          thread_reap_list(l);
 888  881  }
 889  882  
 890  883  /*
 891  884   * cleanup zombie threads that are on deathrow.
 892  885   */
 893  886  void
 894      -thread_reaper()
      887 +thread_reaper(void)
 895  888  {
 896  889          kthread_t *t, *l;
 897  890          callb_cpr_t cprinfo;
 898  891  
 899  892          /*
 900  893           * Register callback to clean up threads when zone is destroyed.
 901  894           */
 902  895          zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
 903  896  
 904  897          CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");

 905  898          for (;;) {
 906  899                  mutex_enter(&reaplock);
 907  900                  while (thread_deathrow == NULL && lwp_deathrow == NULL) {
 908  901                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 909  902                          cv_wait(&reaper_cv, &reaplock);
 910  903                          CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
 911  904                  }
 912  905                  /*
 913  906                   * mutex_sync() needs to be called when reaping, but
 914  907                   * not too often.  We limit reaping rate to once
 915  908                   * per second.  Reaplimit is max rate at which threads can
 916  909                   * be freed. Does not impact thread destruction/creation.
 917  910                   */
 918  911                  t = thread_deathrow;
 919  912                  l = lwp_deathrow;
 920  913                  thread_deathrow = NULL;
 921  914                  lwp_deathrow = NULL;
 922  915                  thread_reapcnt = 0;
 923  916                  lwp_reapcnt = 0;
 924  917                  mutex_exit(&reaplock);
 925  918  
 926  919                  /*
 927  920                   * Guard against race condition in mutex_owner_running:
 928  921                   *      thread=owner(mutex)
 929  922                   *      <interrupt>
 930  923                   *                              thread exits mutex
 931  924                   *                              thread exits
 932  925                   *                              thread reaped
 933  926                   *                              thread struct freed
 934  927                   * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 935  928                   * A cross call to all cpus will cause the interrupt handler
 936  929                   * to reset the PC if it is in mutex_owner_running, refreshing
 937  930                   * stale thread pointers.
 938  931                   */
 939  932                  mutex_sync();   /* sync with mutex code */
 940  933                  /*
 941  934                   * Reap threads
 942  935                   */
 943  936                  thread_reap_list(t);
 944  937  
 945  938                  /*
 946  939                   * Reap lwps
 947  940                   */
 948  941                  thread_reap_list(l);
 949  942                  delay(hz);
 950  943          }
 951  944  }
 952  945  
 953  946  /*
 954  947   * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
 955  948   * thread_deathrow. The thread's state is changed already TS_FREE to indicate
 956  949   * that is reapable. The thread already holds the reaplock, and was already
 957  950   * freed.
 958  951   */
 959  952  void
 960  953  reapq_move_lq_to_tq(kthread_t *t)
 961  954  {
 962  955          ASSERT(t->t_state == TS_FREE);
 963  956          ASSERT(MUTEX_HELD(&reaplock));
 964  957          t->t_forw = thread_deathrow;
 965  958          thread_deathrow = t;
 966  959          thread_reapcnt++;
 967  960          if (lwp_reapcnt + thread_reapcnt > reaplimit)
 968  961                  cv_signal(&reaper_cv);  /* wake the reaper */
 969  962  }
 970  963  
 971  964  /*
 972  965   * This is called by resume() to put a zombie thread onto deathrow.
 973  966   * The thread's state is changed to TS_FREE to indicate that is reapable.
 974  967   * This is called from the idle thread so it must not block - just spin.
 975  968   */
 976  969  void
 977  970  reapq_add(kthread_t *t)
 978  971  {
 979  972          mutex_enter(&reaplock);
 980  973  
 981  974          /*
 982  975           * lwp_deathrow contains threads with lwp linkage and
 983  976           * swappable thread stacks which have the default stacksize.
 984  977           * These threads' lwps and stacks may be reused by lwp_create().
 985  978           *
 986  979           * Anything else goes on thread_deathrow(), where it will eventually
 987  980           * be thread_free()d.
 988  981           */
 989  982          if (t->t_flag & T_LWPREUSE) {
 990  983                  ASSERT(ttolwp(t) != NULL);
 991  984                  t->t_forw = lwp_deathrow;
 992  985                  lwp_deathrow = t;
 993  986                  lwp_reapcnt++;
 994  987          } else {
 995  988                  t->t_forw = thread_deathrow;
 996  989                  thread_deathrow = t;
 997  990                  thread_reapcnt++;
 998  991          }
 999  992          if (lwp_reapcnt + thread_reapcnt > reaplimit)
1000  993                  cv_signal(&reaper_cv);  /* wake the reaper */
1001  994          t->t_state = TS_FREE;
1002  995          lock_clear(&t->t_lock);
1003  996  
1004  997          /*
1005  998           * Before we return, we need to grab and drop the thread lock for
1006  999           * the dead thread.  At this point, the current thread is the idle
1007 1000           * thread, and the dead thread's CPU lock points to the current
1008 1001           * CPU -- and we must grab and drop the lock to synchronize with
1009 1002           * a racing thread walking a blocking chain that the zombie thread
1010 1003           * was recently in.  By this point, that blocking chain is (by
1011 1004           * definition) stale:  the dead thread is not holding any locks, and
1012 1005           * is therefore not in any blocking chains -- but if we do not regrab
1013 1006           * our lock before freeing the dead thread's data structures, the
1014 1007           * thread walking the (stale) blocking chain will die on memory
1015 1008           * corruption when it attempts to drop the dead thread's lock.  We
1016 1009           * only need do this once because there is no way for the dead thread
1017 1010           * to ever again be on a blocking chain:  once we have grabbed and
1018 1011           * dropped the thread lock, we are guaranteed that anyone that could
1019 1012           * have seen this thread in a blocking chain can no longer see it.
1020 1013           */

↓ open down ↓

116 lines elided

↑ open up ↑

1021 1014          thread_lock(t);
1022 1015          thread_unlock(t);
1023 1016  
1024 1017          mutex_exit(&reaplock);
1025 1018  }
1026 1019  
1027 1020  /*
1028 1021   * Install thread context ops for the current thread.
1029 1022   */
1030 1023  void
1031      -installctx(
1032      -        kthread_t *t,
1033      -        void    *arg,
1034      -        void    (*save)(void *),
1035      -        void    (*restore)(void *),
1036      -        void    (*fork)(void *, void *),
1037      -        void    (*lwp_create)(void *, void *),
1038      -        void    (*exit)(void *),
1039      -        void    (*free)(void *, int))
     1024 +installctx(kthread_t *t, void *arg, void (*save)(void *),
     1025 +    void (*restore)(void *), void (*fork)(void *, void *),
     1026 +    void (*lwp_create)(void *, void *), void (*exit)(void *),
     1027 +    void (*free)(void *, int))
1040 1028  {
1041 1029          struct ctxop *ctx;
1042 1030  
1043 1031          ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
1044 1032          ctx->save_op = save;
1045 1033          ctx->restore_op = restore;
1046 1034          ctx->fork_op = fork;
1047 1035          ctx->lwp_create_op = lwp_create;
1048 1036          ctx->exit_op = exit;
1049 1037          ctx->free_op = free;
1050 1038          ctx->arg = arg;
1051 1039          ctx->next = t->t_ctx;
1052 1040          t->t_ctx = ctx;
1053 1041  }
1054 1042  
1055 1043  /*
1056 1044   * Remove the thread context ops from a thread.
1057 1045   */
1058 1046  int
1059      -removectx(
1060      -        kthread_t *t,
1061      -        void    *arg,
1062      -        void    (*save)(void *),
1063      -        void    (*restore)(void *),
1064      -        void    (*fork)(void *, void *),
1065      -        void    (*lwp_create)(void *, void *),
1066      -        void    (*exit)(void *),
1067      -        void    (*free)(void *, int))
     1047 +removectx(kthread_t *t, void *arg, void (*save)(void *),
     1048 +    void (*restore)(void *), void (*fork)(void *, void *),
     1049 +    void (*lwp_create)(void *, void *), void (*exit)(void *),
     1050 +    void (*free)(void *, int))
1068 1051  {
1069 1052          struct ctxop *ctx, *prev_ctx;
1070 1053  
1071 1054          /*
1072 1055           * The incoming kthread_t (which is the thread for which the
1073 1056           * context ops will be removed) should be one of the following:
1074 1057           *
1075 1058           * a) the current thread,
1076 1059           *
1077 1060           * b) a thread of a process that's being forked (SIDL),

1078 1061           *
1079 1062           * c) a thread that belongs to the same process as the current
1080 1063           *    thread and for which the current thread is the agent thread,
1081 1064           *
1082 1065           * d) a thread that is TS_STOPPED which is indicative of it
1083 1066           *    being (if curthread is not an agent) a thread being created
1084 1067           *    as part of an lwp creation.
1085 1068           */
1086 1069          ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
1087 1070              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1088 1071  
1089 1072          /*
1090 1073           * Serialize modifications to t->t_ctx to prevent the agent thread
1091 1074           * and the target thread from racing with each other during lwp exit.
1092 1075           */
1093 1076          mutex_enter(&t->t_ctx_lock);
1094 1077          prev_ctx = NULL;
1095 1078          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
1096 1079                  if (ctx->save_op == save && ctx->restore_op == restore &&
1097 1080                      ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
1098 1081                      ctx->exit_op == exit && ctx->free_op == free &&
1099 1082                      ctx->arg == arg) {
1100 1083                          if (prev_ctx)
1101 1084                                  prev_ctx->next = ctx->next;
1102 1085                          else
1103 1086                                  t->t_ctx = ctx->next;
1104 1087                          mutex_exit(&t->t_ctx_lock);
1105 1088                          if (ctx->free_op != NULL)
1106 1089                                  (ctx->free_op)(ctx->arg, 0);
1107 1090                          kmem_free(ctx, sizeof (struct ctxop));
1108 1091                          return (1);
1109 1092                  }
1110 1093                  prev_ctx = ctx;
1111 1094          }
1112 1095          mutex_exit(&t->t_ctx_lock);
1113 1096  
1114 1097          return (0);
1115 1098  }
1116 1099  
1117 1100  void
1118 1101  savectx(kthread_t *t)
1119 1102  {
1120 1103          struct ctxop *ctx;
1121 1104  
1122 1105          ASSERT(t == curthread);
1123 1106          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1124 1107                  if (ctx->save_op != NULL)
1125 1108                          (ctx->save_op)(ctx->arg);
1126 1109  }
1127 1110  
1128 1111  void
1129 1112  restorectx(kthread_t *t)
1130 1113  {
1131 1114          struct ctxop *ctx;
1132 1115  
1133 1116          ASSERT(t == curthread);
1134 1117          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1135 1118                  if (ctx->restore_op != NULL)
1136 1119                          (ctx->restore_op)(ctx->arg);
1137 1120  }
1138 1121  
1139 1122  void
1140 1123  forkctx(kthread_t *t, kthread_t *ct)
1141 1124  {
1142 1125          struct ctxop *ctx;
1143 1126  
1144 1127          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1145 1128                  if (ctx->fork_op != NULL)
1146 1129                          (ctx->fork_op)(t, ct);
1147 1130  }
1148 1131  
1149 1132  /*
1150 1133   * Note that this operator is only invoked via the _lwp_create
1151 1134   * system call.  The system may have other reasons to create lwps
1152 1135   * e.g. the agent lwp or the doors unreferenced lwp.
1153 1136   */
1154 1137  void
1155 1138  lwp_createctx(kthread_t *t, kthread_t *ct)
1156 1139  {
1157 1140          struct ctxop *ctx;
1158 1141  
1159 1142          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1160 1143                  if (ctx->lwp_create_op != NULL)
1161 1144                          (ctx->lwp_create_op)(t, ct);
1162 1145  }
1163 1146  
1164 1147  /*
1165 1148   * exitctx is called from thread_exit() and lwp_exit() to perform any actions
1166 1149   * needed when the thread/LWP leaves the processor for the last time. This
1167 1150   * routine is not intended to deal with freeing memory; freectx() is used for
1168 1151   * that purpose during thread_free(). This routine is provided to allow for
1169 1152   * clean-up that can't wait until thread_free().
1170 1153   */
1171 1154  void
1172 1155  exitctx(kthread_t *t)
1173 1156  {
1174 1157          struct ctxop *ctx;
1175 1158  
1176 1159          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1177 1160                  if (ctx->exit_op != NULL)
1178 1161                          (ctx->exit_op)(t);
1179 1162  }
1180 1163  
1181 1164  /*
1182 1165   * freectx is called from thread_free() and exec() to get
1183 1166   * rid of old thread context ops.
1184 1167   */
1185 1168  void
1186 1169  freectx(kthread_t *t, int isexec)
1187 1170  {
1188 1171          struct ctxop *ctx;
1189 1172  
1190 1173          while ((ctx = t->t_ctx) != NULL) {
1191 1174                  t->t_ctx = ctx->next;
1192 1175                  if (ctx->free_op != NULL)
1193 1176                          (ctx->free_op)(ctx->arg, isexec);
1194 1177                  kmem_free(ctx, sizeof (struct ctxop));
1195 1178          }
1196 1179  }
1197 1180  
1198 1181  /*
1199 1182   * freectx_ctx is called from lwp_create() when lwp is reused from
1200 1183   * lwp_deathrow and its thread structure is added to thread_deathrow.
1201 1184   * The thread structure to which this ctx was attached may be already
1202 1185   * freed by the thread reaper so free_op implementations shouldn't rely
1203 1186   * on thread structure to which this ctx was attached still being around.
1204 1187   */
1205 1188  void
1206 1189  freectx_ctx(struct ctxop *ctx)
1207 1190  {
1208 1191          struct ctxop *nctx;
1209 1192  
1210 1193          ASSERT(ctx != NULL);
1211 1194  
1212 1195          do {
1213 1196                  nctx = ctx->next;
1214 1197                  if (ctx->free_op != NULL)
1215 1198                          (ctx->free_op)(ctx->arg, 0);
1216 1199                  kmem_free(ctx, sizeof (struct ctxop));
1217 1200          } while ((ctx = nctx) != NULL);
1218 1201  }
1219 1202  
1220 1203  /*
1221 1204   * Set the thread running; arrange for it to be swapped in if necessary.
1222 1205   */
1223 1206  void
1224 1207  setrun_locked(kthread_t *t)
1225 1208  {
1226 1209          ASSERT(THREAD_LOCK_HELD(t));
1227 1210          if (t->t_state == TS_SLEEP) {
1228 1211                  /*
1229 1212                   * Take off sleep queue.
1230 1213                   */
1231 1214                  SOBJ_UNSLEEP(t->t_sobj_ops, t);
1232 1215          } else if (t->t_state & (TS_RUN | TS_ONPROC)) {
1233 1216                  /*
1234 1217                   * Already on dispatcher queue.
1235 1218                   */
1236 1219                  return;
1237 1220          } else if (t->t_state == TS_WAIT) {
1238 1221                  waitq_setrun(t);
1239 1222          } else if (t->t_state == TS_STOPPED) {
1240 1223                  /*
1241 1224                   * All of the sending of SIGCONT (TC_XSTART) and /proc
1242 1225                   * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
1243 1226                   * requested that the thread be run.
1244 1227                   * Just calling setrun() is not sufficient to set a stopped
1245 1228                   * thread running.  TP_TXSTART is always set if the thread
1246 1229                   * is not stopped by a jobcontrol stop signal.
1247 1230                   * TP_TPSTART is always set if /proc is not controlling it.
1248 1231                   * TP_TCSTART is always set if lwp_suspend() didn't stop it.
1249 1232                   * The thread won't be stopped unless one of these
1250 1233                   * three mechanisms did it.
1251 1234                   *
1252 1235                   * These flags must be set before calling setrun_locked(t).
1253 1236                   * They can't be passed as arguments because the streams
1254 1237                   * code calls setrun() indirectly and the mechanism for
1255 1238                   * doing so admits only one argument.  Note that the
1256 1239                   * thread must be locked in order to change t_schedflags.
1257 1240                   */
1258 1241                  if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
1259 1242                          return;
1260 1243                  /*
1261 1244                   * Process is no longer stopped (a thread is running).
1262 1245                   */
1263 1246                  t->t_whystop = 0;
1264 1247                  t->t_whatstop = 0;
1265 1248                  /*
1266 1249                   * Strictly speaking, we do not have to clear these
1267 1250                   * flags here; they are cleared on entry to stop().
1268 1251                   * However, they are confusing when doing kernel
1269 1252                   * debugging or when they are revealed by ps(1).
1270 1253                   */
1271 1254                  t->t_schedflag &= ~TS_ALLSTART;
1272 1255                  THREAD_TRANSITION(t);   /* drop stopped-thread lock */
1273 1256                  ASSERT(t->t_lockp == &transition_lock);
1274 1257                  ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
1275 1258                  /*
1276 1259                   * Let the class put the process on the dispatcher queue.
1277 1260                   */
1278 1261                  CL_SETRUN(t);
1279 1262          }
1280 1263  }
1281 1264  
1282 1265  void
1283 1266  setrun(kthread_t *t)
1284 1267  {
1285 1268          thread_lock(t);
1286 1269          setrun_locked(t);
1287 1270          thread_unlock(t);
1288 1271  }
1289 1272  
1290 1273  /*
1291 1274   * Unpin an interrupted thread.
1292 1275   *      When an interrupt occurs, the interrupt is handled on the stack

↓ open down ↓

215 lines elided

↑ open up ↑

1293 1276   *      of an interrupt thread, taken from a pool linked to the CPU structure.
1294 1277   *
1295 1278   *      When swtch() is switching away from an interrupt thread because it
1296 1279   *      blocked or was preempted, this routine is called to complete the
1297 1280   *      saving of the interrupted thread state, and returns the interrupted
1298 1281   *      thread pointer so it may be resumed.
1299 1282   *
1300 1283   *      Called by swtch() only at high spl.
1301 1284   */
1302 1285  kthread_t *
1303      -thread_unpin()
     1286 +thread_unpin(void)
1304 1287  {
1305 1288          kthread_t       *t = curthread; /* current thread */
1306 1289          kthread_t       *itp;           /* interrupted thread */
1307 1290          int             i;              /* interrupt level */
1308 1291          extern int      intr_passivate();
1309 1292  
1310 1293          ASSERT(t->t_intr != NULL);
1311 1294  
1312 1295          itp = t->t_intr;                /* interrupted thread */
1313 1296          t->t_intr = NULL;               /* clear interrupt ptr */

1314 1297  
1315 1298          /*
1316 1299           * Get state from interrupt thread for the one
1317 1300           * it interrupted.
1318 1301           */
1319 1302  
1320 1303          i = intr_passivate(t, itp);
1321 1304  
1322 1305          TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
1323 1306              "intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
1324 1307              i, t, t, itp, itp);
1325 1308  
1326 1309          /*
1327 1310           * Dissociate the current thread from the interrupted thread's LWP.
1328 1311           */
1329 1312          t->t_lwp = NULL;
1330 1313  
1331 1314          /*
1332 1315           * Interrupt handlers above the level that spinlocks block must
1333 1316           * not block.
1334 1317           */
1335 1318  #if DEBUG
1336 1319          if (i < 0 || i > LOCK_LEVEL)
1337 1320                  cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
1338 1321  #endif
1339 1322  
1340 1323          /*

↓ open down ↓

27 lines elided

↑ open up ↑

1341 1324           * Compute the CPU's base interrupt level based on the active
1342 1325           * interrupts.
1343 1326           */
1344 1327          ASSERT(CPU->cpu_intr_actv & (1 << i));
1345 1328          set_base_spl();
1346 1329  
1347 1330          return (itp);
1348 1331  }
1349 1332  
1350 1333  /*
1351      - * Create and initialize an interrupt thread.
1352      - *      Returns non-zero on error.
1353      - *      Called at spl7() or better.
1354      - */
1355      -void
1356      -thread_create_intr(struct cpu *cp)
1357      -{
1358      -        kthread_t *tp;
1359      -
1360      -        tp = thread_create(NULL, 0,
1361      -            (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
1362      -
1363      -        /*
1364      -         * Set the thread in the TS_FREE state.  The state will change
1365      -         * to TS_ONPROC only while the interrupt is active.  Think of these
1366      -         * as being on a private free list for the CPU.  Being TS_FREE keeps
1367      -         * inactive interrupt threads out of debugger thread lists.
1368      -         *
1369      -         * We cannot call thread_create with TS_FREE because of the current
1370      -         * checks there for ONPROC.  Fix this when thread_create takes flags.
1371      -         */
1372      -        THREAD_FREEINTR(tp, cp);
1373      -
1374      -        /*
1375      -         * Nobody should ever reference the credentials of an interrupt
1376      -         * thread so make it NULL to catch any such references.
1377      -         */
1378      -        tp->t_cred = NULL;
1379      -        tp->t_flag |= T_INTR_THREAD;
1380      -        tp->t_cpu = cp;
1381      -        tp->t_bound_cpu = cp;
1382      -        tp->t_disp_queue = cp->cpu_disp;
1383      -        tp->t_affinitycnt = 1;
1384      -        tp->t_preempt = 1;
1385      -
1386      -        /*
1387      -         * Don't make a user-requested binding on this thread so that
1388      -         * the processor can be offlined.
1389      -         */
1390      -        tp->t_bind_cpu = PBIND_NONE;    /* no USER-requested binding */
1391      -        tp->t_bind_pset = PS_NONE;
1392      -
1393      -#if defined(__i386) || defined(__amd64)
1394      -        tp->t_stk -= STACK_ALIGN;
1395      -        *(tp->t_stk) = 0;               /* terminate intr thread stack */
1396      -#endif
1397      -
1398      -        /*
1399      -         * Link onto CPU's interrupt pool.
1400      -         */
1401      -        tp->t_link = cp->cpu_intr_thread;
1402      -        cp->cpu_intr_thread = tp;
1403      -}
1404      -
1405      -/*
1406 1334   * TSD -- THREAD SPECIFIC DATA
1407 1335   */
1408 1336  static kmutex_t         tsd_mutex;       /* linked list spin lock */
1409 1337  static uint_t           tsd_nkeys;       /* size of destructor array */
1410 1338  /* per-key destructor funcs */
1411 1339  static void             (**tsd_destructor)(void *);
1412 1340  /* list of tsd_thread's */
1413 1341  static struct tsd_thread        *tsd_list;
1414 1342  
1415 1343  /*

1416 1344   * Default destructor
1417 1345   *      Needed because NULL destructor means that the key is unused
1418 1346   */
1419 1347  /* ARGSUSED */
1420 1348  void
1421 1349  tsd_defaultdestructor(void *value)
1422 1350  {}
1423 1351  
1424 1352  /*
1425 1353   * Create a key (index into per thread array)
1426 1354   *      Locks out tsd_create, tsd_destroy, and tsd_exit
1427 1355   *      May allocate memory with lock held
1428 1356   */
1429 1357  void
1430 1358  tsd_create(uint_t *keyp, void (*destructor)(void *))
1431 1359  {
1432 1360          int     i;
1433 1361          uint_t  nkeys;
1434 1362  
1435 1363          /*
1436 1364           * if key is allocated, do nothing
1437 1365           */
1438 1366          mutex_enter(&tsd_mutex);
1439 1367          if (*keyp) {
1440 1368                  mutex_exit(&tsd_mutex);
1441 1369                  return;
1442 1370          }
1443 1371          /*
1444 1372           * find an unused key
1445 1373           */
1446 1374          if (destructor == NULL)
1447 1375                  destructor = tsd_defaultdestructor;
1448 1376  
1449 1377          for (i = 0; i < tsd_nkeys; ++i)
1450 1378                  if (tsd_destructor[i] == NULL)
1451 1379                          break;
1452 1380  
1453 1381          /*
1454 1382           * if no unused keys, increase the size of the destructor array
1455 1383           */
1456 1384          if (i == tsd_nkeys) {
1457 1385                  if ((nkeys = (tsd_nkeys << 1)) == 0)
1458 1386                          nkeys = 1;
1459 1387                  tsd_destructor =
1460 1388                      (void (**)(void *))tsd_realloc((void *)tsd_destructor,
1461 1389                      (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
1462 1390                      (size_t)(nkeys * sizeof (void (*)(void *))));
1463 1391                  tsd_nkeys = nkeys;
1464 1392          }
1465 1393  
1466 1394          /*
1467 1395           * allocate the next available unused key
1468 1396           */
1469 1397          tsd_destructor[i] = destructor;
1470 1398          *keyp = i + 1;
1471 1399          mutex_exit(&tsd_mutex);
1472 1400  }
1473 1401  
1474 1402  /*
1475 1403   * Destroy a key -- this is for unloadable modules
1476 1404   *
1477 1405   * Assumes that the caller is preventing tsd_set and tsd_get
1478 1406   * Locks out tsd_create, tsd_destroy, and tsd_exit
1479 1407   * May free memory with lock held
1480 1408   */
1481 1409  void
1482 1410  tsd_destroy(uint_t *keyp)
1483 1411  {
1484 1412          uint_t key;
1485 1413          struct tsd_thread *tsd;
1486 1414  
1487 1415          /*
1488 1416           * protect the key namespace and our destructor lists
1489 1417           */
1490 1418          mutex_enter(&tsd_mutex);
1491 1419          key = *keyp;
1492 1420          *keyp = 0;
1493 1421  
1494 1422          ASSERT(key <= tsd_nkeys);
1495 1423  
1496 1424          /*
1497 1425           * if the key is valid
1498 1426           */
1499 1427          if (key != 0) {
1500 1428                  uint_t k = key - 1;
1501 1429                  /*
1502 1430                   * for every thread with TSD, call key's destructor
1503 1431                   */
1504 1432                  for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
1505 1433                          /*
1506 1434                           * no TSD for key in this thread
1507 1435                           */
1508 1436                          if (key > tsd->ts_nkeys)
1509 1437                                  continue;
1510 1438                          /*
1511 1439                           * call destructor for key
1512 1440                           */
1513 1441                          if (tsd->ts_value[k] && tsd_destructor[k])
1514 1442                                  (*tsd_destructor[k])(tsd->ts_value[k]);
1515 1443                          /*
1516 1444                           * reset value for key
1517 1445                           */
1518 1446                          tsd->ts_value[k] = NULL;
1519 1447                  }
1520 1448                  /*
1521 1449                   * actually free the key (NULL destructor == unused)
1522 1450                   */
1523 1451                  tsd_destructor[k] = NULL;
1524 1452          }
1525 1453  
1526 1454          mutex_exit(&tsd_mutex);
1527 1455  }
1528 1456  
1529 1457  /*
1530 1458   * Quickly return the per thread value that was stored with the specified key
1531 1459   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1532 1460   */
1533 1461  void *
1534 1462  tsd_get(uint_t key)
1535 1463  {
1536 1464          return (tsd_agent_get(curthread, key));
1537 1465  }
1538 1466  
1539 1467  /*
1540 1468   * Set a per thread value indexed with the specified key
1541 1469   */
1542 1470  int
1543 1471  tsd_set(uint_t key, void *value)
1544 1472  {
1545 1473          return (tsd_agent_set(curthread, key, value));
1546 1474  }
1547 1475  
1548 1476  /*
1549 1477   * Like tsd_get(), except that the agent lwp can get the tsd of
1550 1478   * another thread in the same process (the agent thread only runs when the
1551 1479   * process is completely stopped by /proc), or syslwp is creating a new lwp.
1552 1480   */
1553 1481  void *
1554 1482  tsd_agent_get(kthread_t *t, uint_t key)
1555 1483  {
1556 1484          struct tsd_thread *tsd = t->t_tsd;
1557 1485  
1558 1486          ASSERT(t == curthread ||
1559 1487              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1560 1488  
1561 1489          if (key && tsd != NULL && key <= tsd->ts_nkeys)
1562 1490                  return (tsd->ts_value[key - 1]);
1563 1491          return (NULL);
1564 1492  }
1565 1493  
1566 1494  /*
1567 1495   * Like tsd_set(), except that the agent lwp can set the tsd of
1568 1496   * another thread in the same process, or syslwp can set the tsd
1569 1497   * of a thread it's in the middle of creating.
1570 1498   *
1571 1499   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1572 1500   * May lock out tsd_destroy (and tsd_create), may allocate memory with
1573 1501   * lock held
1574 1502   */
1575 1503  int
1576 1504  tsd_agent_set(kthread_t *t, uint_t key, void *value)
1577 1505  {
1578 1506          struct tsd_thread *tsd = t->t_tsd;
1579 1507  
1580 1508          ASSERT(t == curthread ||
1581 1509              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1582 1510  
1583 1511          if (key == 0)
1584 1512                  return (EINVAL);
1585 1513          if (tsd == NULL)
1586 1514                  tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1587 1515          if (key <= tsd->ts_nkeys) {
1588 1516                  tsd->ts_value[key - 1] = value;
1589 1517                  return (0);
1590 1518          }
1591 1519  
1592 1520          ASSERT(key <= tsd_nkeys);
1593 1521  
1594 1522          /*
1595 1523           * lock out tsd_destroy()
1596 1524           */
1597 1525          mutex_enter(&tsd_mutex);
1598 1526          if (tsd->ts_nkeys == 0) {
1599 1527                  /*
1600 1528                   * Link onto list of threads with TSD
1601 1529                   */
1602 1530                  if ((tsd->ts_next = tsd_list) != NULL)
1603 1531                          tsd_list->ts_prev = tsd;
1604 1532                  tsd_list = tsd;
1605 1533          }
1606 1534  
1607 1535          /*
1608 1536           * Allocate thread local storage and set the value for key
1609 1537           */
1610 1538          tsd->ts_value = tsd_realloc(tsd->ts_value,
1611 1539              tsd->ts_nkeys * sizeof (void *),
1612 1540              key * sizeof (void *));
1613 1541          tsd->ts_nkeys = key;
1614 1542          tsd->ts_value[key - 1] = value;
1615 1543          mutex_exit(&tsd_mutex);
1616 1544  
1617 1545          return (0);
1618 1546  }
1619 1547  
1620 1548  
1621 1549  /*
1622 1550   * Return the per thread value that was stored with the specified key
1623 1551   *      If necessary, create the key and the value
1624 1552   *      Assumes the caller is protecting *keyp from tsd_destroy
1625 1553   */
1626 1554  void *
1627 1555  tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
1628 1556  {
1629 1557          void *value;
1630 1558          uint_t key = *keyp;
1631 1559          struct tsd_thread *tsd = curthread->t_tsd;
1632 1560  
1633 1561          if (tsd == NULL)
1634 1562                  tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1635 1563          if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
1636 1564                  return (value);
1637 1565          if (key == 0)
1638 1566                  tsd_create(keyp, destroy);
1639 1567          (void) tsd_set(*keyp, value = (*allocate)());
1640 1568  
1641 1569          return (value);
1642 1570  }
1643 1571  
1644 1572  /*
1645 1573   * Called from thread_exit() to run the destructor function for each tsd
1646 1574   *      Locks out tsd_create and tsd_destroy
1647 1575   *      Assumes that the destructor *DOES NOT* use tsd
1648 1576   */
1649 1577  void
1650 1578  tsd_exit(void)
1651 1579  {
1652 1580          int i;
1653 1581          struct tsd_thread *tsd = curthread->t_tsd;
1654 1582  
1655 1583          if (tsd == NULL)
1656 1584                  return;
1657 1585  
1658 1586          if (tsd->ts_nkeys == 0) {
1659 1587                  kmem_free(tsd, sizeof (*tsd));
1660 1588                  curthread->t_tsd = NULL;
1661 1589                  return;
1662 1590          }
1663 1591  
1664 1592          /*
1665 1593           * lock out tsd_create and tsd_destroy, call
1666 1594           * the destructor, and mark the value as destroyed.
1667 1595           */
1668 1596          mutex_enter(&tsd_mutex);
1669 1597  
1670 1598          for (i = 0; i < tsd->ts_nkeys; i++) {
1671 1599                  if (tsd->ts_value[i] && tsd_destructor[i])
1672 1600                          (*tsd_destructor[i])(tsd->ts_value[i]);
1673 1601                  tsd->ts_value[i] = NULL;
1674 1602          }
1675 1603  
1676 1604          /*
1677 1605           * remove from linked list of threads with TSD
1678 1606           */
1679 1607          if (tsd->ts_next)
1680 1608                  tsd->ts_next->ts_prev = tsd->ts_prev;
1681 1609          if (tsd->ts_prev)
1682 1610                  tsd->ts_prev->ts_next = tsd->ts_next;
1683 1611          if (tsd_list == tsd)
1684 1612                  tsd_list = tsd->ts_next;
1685 1613  
1686 1614          mutex_exit(&tsd_mutex);
1687 1615  
1688 1616          /*
1689 1617           * free up the TSD
1690 1618           */
1691 1619          kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
1692 1620          kmem_free(tsd, sizeof (struct tsd_thread));
1693 1621          curthread->t_tsd = NULL;
1694 1622  }
1695 1623  
1696 1624  /*
1697 1625   * realloc
1698 1626   */
1699 1627  static void *
1700 1628  tsd_realloc(void *old, size_t osize, size_t nsize)
1701 1629  {
1702 1630          void *new;
1703 1631  
1704 1632          new = kmem_zalloc(nsize, KM_SLEEP);
1705 1633          if (old) {
1706 1634                  bcopy(old, new, osize);
1707 1635                  kmem_free(old, osize);
1708 1636          }
1709 1637          return (new);
1710 1638  }
1711 1639  
1712 1640  /*
1713 1641   * Return non-zero if an interrupt is being serviced.
1714 1642   */
1715 1643  int
1716 1644  servicing_interrupt()
1717 1645  {
1718 1646          int onintr = 0;
1719 1647  
1720 1648          /* Are we an interrupt thread */
1721 1649          if (curthread->t_flag & T_INTR_THREAD)
1722 1650                  return (1);
1723 1651          /* Are we servicing a high level interrupt? */
1724 1652          if (CPU_ON_INTR(CPU)) {
1725 1653                  kpreempt_disable();
1726 1654                  onintr = CPU_ON_INTR(CPU);
1727 1655                  kpreempt_enable();
1728 1656          }
1729 1657          return (onintr);
1730 1658  }
1731 1659  
1732 1660  
1733 1661  /*
1734 1662   * Change the dispatch priority of a thread in the system.
1735 1663   * Used when raising or lowering a thread's priority.
1736 1664   * (E.g., priority inheritance)
1737 1665   *
1738 1666   * Since threads are queued according to their priority, we
1739 1667   * we must check the thread's state to determine whether it
1740 1668   * is on a queue somewhere. If it is, we've got to:
1741 1669   *
1742 1670   *      o Dequeue the thread.
1743 1671   *      o Change its effective priority.
1744 1672   *      o Enqueue the thread.
1745 1673   *
1746 1674   * Assumptions: The thread whose priority we wish to change
1747 1675   * must be locked before we call thread_change_(e)pri().
1748 1676   * The thread_change(e)pri() function doesn't drop the thread
1749 1677   * lock--that must be done by its caller.
1750 1678   */
1751 1679  void
1752 1680  thread_change_epri(kthread_t *t, pri_t disp_pri)
1753 1681  {
1754 1682          uint_t  state;
1755 1683  
1756 1684          ASSERT(THREAD_LOCK_HELD(t));
1757 1685  
1758 1686          /*
1759 1687           * If the inherited priority hasn't actually changed,
1760 1688           * just return.
1761 1689           */
1762 1690          if (t->t_epri == disp_pri)
1763 1691                  return;
1764 1692  
1765 1693          state = t->t_state;
1766 1694  
1767 1695          /*
1768 1696           * If it's not on a queue, change the priority with impunity.
1769 1697           */
1770 1698          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1771 1699                  t->t_epri = disp_pri;
1772 1700                  if (state == TS_ONPROC) {
1773 1701                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1774 1702  
1775 1703                          if (t == cp->cpu_dispthread)
1776 1704                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1777 1705                  }
1778 1706          } else if (state == TS_SLEEP) {
1779 1707                  /*
1780 1708                   * Take the thread out of its sleep queue.
1781 1709                   * Change the inherited priority.
1782 1710                   * Re-enqueue the thread.
1783 1711                   * Each synchronization object exports a function
1784 1712                   * to do this in an appropriate manner.
1785 1713                   */
1786 1714                  SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
1787 1715          } else if (state == TS_WAIT) {
1788 1716                  /*
1789 1717                   * Re-enqueue a thread on the wait queue if its
1790 1718                   * effective priority needs to change.
1791 1719                   */
1792 1720                  if (disp_pri != t->t_epri)
1793 1721                          waitq_change_pri(t, disp_pri);
1794 1722          } else {
1795 1723                  /*
1796 1724                   * The thread is on a run queue.
1797 1725                   * Note: setbackdq() may not put the thread
1798 1726                   * back on the same run queue where it originally
1799 1727                   * resided.
1800 1728                   */
1801 1729                  (void) dispdeq(t);
1802 1730                  t->t_epri = disp_pri;
1803 1731                  setbackdq(t);
1804 1732          }
1805 1733          schedctl_set_cidpri(t);
1806 1734  }
1807 1735  
1808 1736  /*
1809 1737   * Function: Change the t_pri field of a thread.
1810 1738   * Side Effects: Adjust the thread ordering on a run queue
1811 1739   *               or sleep queue, if necessary.
1812 1740   * Returns: 1 if the thread was on a run queue, else 0.
1813 1741   */
1814 1742  int
1815 1743  thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
1816 1744  {
1817 1745          uint_t  state;
1818 1746          int     on_rq = 0;
1819 1747  
1820 1748          ASSERT(THREAD_LOCK_HELD(t));
1821 1749  
1822 1750          state = t->t_state;
1823 1751          THREAD_WILLCHANGE_PRI(t, disp_pri);
1824 1752  
1825 1753          /*
1826 1754           * If it's not on a queue, change the priority with impunity.
1827 1755           */
1828 1756          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1829 1757                  t->t_pri = disp_pri;
1830 1758  
1831 1759                  if (state == TS_ONPROC) {
1832 1760                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1833 1761  
1834 1762                          if (t == cp->cpu_dispthread)
1835 1763                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1836 1764                  }
1837 1765          } else if (state == TS_SLEEP) {
1838 1766                  /*
1839 1767                   * If the priority has changed, take the thread out of
1840 1768                   * its sleep queue and change the priority.
1841 1769                   * Re-enqueue the thread.
1842 1770                   * Each synchronization object exports a function
1843 1771                   * to do this in an appropriate manner.
1844 1772                   */
1845 1773                  if (disp_pri != t->t_pri)
1846 1774                          SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
1847 1775          } else if (state == TS_WAIT) {
1848 1776                  /*
1849 1777                   * Re-enqueue a thread on the wait queue if its
1850 1778                   * priority needs to change.
1851 1779                   */
1852 1780                  if (disp_pri != t->t_pri)
1853 1781                          waitq_change_pri(t, disp_pri);
1854 1782          } else {
1855 1783                  /*
1856 1784                   * The thread is on a run queue.
1857 1785                   * Note: setbackdq() may not put the thread
1858 1786                   * back on the same run queue where it originally
1859 1787                   * resided.
1860 1788                   *
1861 1789                   * We still requeue the thread even if the priority
1862 1790                   * is unchanged to preserve round-robin (and other)
1863 1791                   * effects between threads of the same priority.
1864 1792                   */
1865 1793                  on_rq = dispdeq(t);
1866 1794                  ASSERT(on_rq);
1867 1795                  t->t_pri = disp_pri;
1868 1796                  if (front) {
1869 1797                          setfrontdq(t);
1870 1798                  } else {
1871 1799                          setbackdq(t);
1872 1800                  }
1873 1801          }
1874 1802          schedctl_set_cidpri(t);
1875 1803          return (on_rq);
1876 1804  }
1877 1805  
1878 1806  /*
1879 1807   * Tunable kmem_stackinfo is set, fill the kernel thread stack with a
1880 1808   * specific pattern.
1881 1809   */
1882 1810  static void
1883 1811  stkinfo_begin(kthread_t *t)
1884 1812  {
1885 1813          caddr_t start;  /* stack start */
1886 1814          caddr_t end;    /* stack end  */
1887 1815          uint64_t *ptr;  /* pattern pointer */
1888 1816  
1889 1817          /*
1890 1818           * Stack grows up or down, see thread_create(),
1891 1819           * compute stack memory area start and end (start < end).
1892 1820           */
1893 1821          if (t->t_stk > t->t_stkbase) {
1894 1822                  /* stack grows down */
1895 1823                  start = t->t_stkbase;
1896 1824                  end = t->t_stk;
1897 1825          } else {
1898 1826                  /* stack grows up */
1899 1827                  start = t->t_stk;
1900 1828                  end = t->t_stkbase;
1901 1829          }
1902 1830  
1903 1831          /*
1904 1832           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1905 1833           * alignement for start and end in stack area boundaries
1906 1834           * (protection against corrupt t_stkbase/t_stk data).
1907 1835           */
1908 1836          if ((((uintptr_t)start) & 0x7) != 0) {
1909 1837                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1910 1838          }
1911 1839          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1912 1840  
1913 1841          if ((end <= start) || (end - start) > (1024 * 1024)) {
1914 1842                  /* negative or stack size > 1 meg, assume bogus */
1915 1843                  return;
1916 1844          }
1917 1845  
1918 1846          /* fill stack area with a pattern (instead of zeros) */
1919 1847          ptr = (uint64_t *)((void *)start);
1920 1848          while (ptr < (uint64_t *)((void *)end)) {
1921 1849                  *ptr++ = KMEM_STKINFO_PATTERN;
1922 1850          }
1923 1851  }
1924 1852  
1925 1853  
1926 1854  /*
1927 1855   * Tunable kmem_stackinfo is set, create stackinfo log if doesn't already exist,
1928 1856   * compute the percentage of kernel stack really used, and set in the log
1929 1857   * if it's the latest highest percentage.
1930 1858   */
1931 1859  static void
1932 1860  stkinfo_end(kthread_t *t)
1933 1861  {
1934 1862          caddr_t start;  /* stack start */
1935 1863          caddr_t end;    /* stack end  */
1936 1864          uint64_t *ptr;  /* pattern pointer */
1937 1865          size_t stksz;   /* stack size */
1938 1866          size_t smallest = 0;
1939 1867          size_t percent = 0;
1940 1868          uint_t index = 0;
1941 1869          uint_t i;
1942 1870          static size_t smallest_percent = (size_t)-1;
1943 1871          static uint_t full = 0;
1944 1872  
1945 1873          /* create the stackinfo log, if doesn't already exist */
1946 1874          mutex_enter(&kmem_stkinfo_lock);
1947 1875          if (kmem_stkinfo_log == NULL) {
1948 1876                  kmem_stkinfo_log = (kmem_stkinfo_t *)
1949 1877                      kmem_zalloc(KMEM_STKINFO_LOG_SIZE *
1950 1878                      (sizeof (kmem_stkinfo_t)), KM_NOSLEEP);
1951 1879                  if (kmem_stkinfo_log == NULL) {
1952 1880                          mutex_exit(&kmem_stkinfo_lock);
1953 1881                          return;
1954 1882                  }
1955 1883          }
1956 1884          mutex_exit(&kmem_stkinfo_lock);
1957 1885  
1958 1886          /*
1959 1887           * Stack grows up or down, see thread_create(),
1960 1888           * compute stack memory area start and end (start < end).
1961 1889           */
1962 1890          if (t->t_stk > t->t_stkbase) {
1963 1891                  /* stack grows down */
1964 1892                  start = t->t_stkbase;
1965 1893                  end = t->t_stk;
1966 1894          } else {
1967 1895                  /* stack grows up */
1968 1896                  start = t->t_stk;
1969 1897                  end = t->t_stkbase;
1970 1898          }
1971 1899  
1972 1900          /* stack size as found in kthread_t */
1973 1901          stksz = end - start;
1974 1902  
1975 1903          /*
1976 1904           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1977 1905           * alignement for start and end in stack area boundaries
1978 1906           * (protection against corrupt t_stkbase/t_stk data).
1979 1907           */
1980 1908          if ((((uintptr_t)start) & 0x7) != 0) {
1981 1909                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1982 1910          }
1983 1911          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1984 1912  
1985 1913          if ((end <= start) || (end - start) > (1024 * 1024)) {
1986 1914                  /* negative or stack size > 1 meg, assume bogus */
1987 1915                  return;
1988 1916          }
1989 1917  
1990 1918          /* search until no pattern in the stack */
1991 1919          if (t->t_stk > t->t_stkbase) {
1992 1920                  /* stack grows down */
1993 1921  #if defined(__i386) || defined(__amd64)
1994 1922                  /*
1995 1923                   * 6 longs are pushed on stack, see thread_load(). Skip
1996 1924                   * them, so if kthread has never run, percent is zero.
1997 1925                   * 8 bytes alignement is preserved for a 32 bit kernel,
1998 1926                   * 6 x 4 = 24, 24 is a multiple of 8.
1999 1927                   *
2000 1928                   */
2001 1929                  end -= (6 * sizeof (long));
2002 1930  #endif
2003 1931                  ptr = (uint64_t *)((void *)start);
2004 1932                  while (ptr < (uint64_t *)((void *)end)) {
2005 1933                          if (*ptr != KMEM_STKINFO_PATTERN) {
2006 1934                                  percent = stkinfo_percent(end,
2007 1935                                      start, (caddr_t)ptr);
2008 1936                                  break;
2009 1937                          }
2010 1938                          ptr++;
2011 1939                  }
2012 1940          } else {
2013 1941                  /* stack grows up */
2014 1942                  ptr = (uint64_t *)((void *)end);
2015 1943                  ptr--;
2016 1944                  while (ptr >= (uint64_t *)((void *)start)) {
2017 1945                          if (*ptr != KMEM_STKINFO_PATTERN) {
2018 1946                                  percent = stkinfo_percent(start,
2019 1947                                      end, (caddr_t)ptr);
2020 1948                                  break;
2021 1949                          }
2022 1950                          ptr--;
2023 1951                  }
2024 1952          }
2025 1953  
2026 1954          DTRACE_PROBE3(stack__usage, kthread_t *, t,
2027 1955              size_t, stksz, size_t, percent);
2028 1956  
2029 1957          if (percent == 0) {
2030 1958                  return;
2031 1959          }
2032 1960  
2033 1961          mutex_enter(&kmem_stkinfo_lock);
2034 1962          if (full == KMEM_STKINFO_LOG_SIZE && percent < smallest_percent) {
2035 1963                  /*
2036 1964                   * The log is full and already contains the highest values
2037 1965                   */
2038 1966                  mutex_exit(&kmem_stkinfo_lock);
2039 1967                  return;
2040 1968          }
2041 1969  
2042 1970          /* keep a log of the highest used stack */
2043 1971          for (i = 0; i < KMEM_STKINFO_LOG_SIZE; i++) {
2044 1972                  if (kmem_stkinfo_log[i].percent == 0) {
2045 1973                          index = i;
2046 1974                          full++;
2047 1975                          break;
2048 1976                  }
2049 1977                  if (smallest == 0) {
2050 1978                          smallest = kmem_stkinfo_log[i].percent;
2051 1979                          index = i;
2052 1980                          continue;
2053 1981                  }
2054 1982                  if (kmem_stkinfo_log[i].percent < smallest) {
2055 1983                          smallest = kmem_stkinfo_log[i].percent;
2056 1984                          index = i;
2057 1985                  }
2058 1986          }
2059 1987  
2060 1988          if (percent >= kmem_stkinfo_log[index].percent) {
2061 1989                  kmem_stkinfo_log[index].kthread = (caddr_t)t;
2062 1990                  kmem_stkinfo_log[index].t_startpc = (caddr_t)t->t_startpc;
2063 1991                  kmem_stkinfo_log[index].start = start;
2064 1992                  kmem_stkinfo_log[index].stksz = stksz;
2065 1993                  kmem_stkinfo_log[index].percent = percent;
2066 1994                  kmem_stkinfo_log[index].t_tid = t->t_tid;
2067 1995                  kmem_stkinfo_log[index].cmd[0] = '\0';
2068 1996                  if (t->t_tid != 0) {
2069 1997                          stksz = strlen((t->t_procp)->p_user.u_comm);
2070 1998                          if (stksz >= KMEM_STKINFO_STR_SIZE) {
2071 1999                                  stksz = KMEM_STKINFO_STR_SIZE - 1;
2072 2000                                  kmem_stkinfo_log[index].cmd[stksz] = '\0';
2073 2001                          } else {
2074 2002                                  stksz += 1;
2075 2003                          }
2076 2004                          (void) memcpy(kmem_stkinfo_log[index].cmd,
2077 2005                              (t->t_procp)->p_user.u_comm, stksz);
2078 2006                  }
2079 2007                  if (percent < smallest_percent) {
2080 2008                          smallest_percent = percent;
2081 2009                  }
2082 2010          }
2083 2011          mutex_exit(&kmem_stkinfo_lock);
2084 2012  }
2085 2013  
2086 2014  /*
2087 2015   * Tunable kmem_stackinfo is set, compute stack utilization percentage.
2088 2016   */
2089 2017  static size_t
2090 2018  stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
2091 2019  {
2092 2020          size_t percent;
2093 2021          size_t s;
2094 2022  
2095 2023          if (t_stk > t_stkbase) {
2096 2024                  /* stack grows down */
2097 2025                  if (sp > t_stk) {
2098 2026                          return (0);
2099 2027                  }
2100 2028                  if (sp < t_stkbase) {
2101 2029                          return (100);
2102 2030                  }
2103 2031                  percent = t_stk - sp + 1;
2104 2032                  s = t_stk - t_stkbase + 1;
2105 2033          } else {
2106 2034                  /* stack grows up */
2107 2035                  if (sp < t_stk) {
2108 2036                          return (0);
2109 2037                  }
2110 2038                  if (sp > t_stkbase) {
2111 2039                          return (100);
2112 2040                  }
2113 2041                  percent = sp - t_stk + 1;
2114 2042                  s = t_stkbase - t_stk + 1;
2115 2043          }
2116 2044          percent = ((100 * percent) / s) + 1;
2117 2045          if (percent > 100) {
2118 2046                  percent = 100;
2119 2047          }
2120 2048          return (percent);
2121 2049  }

↓ open down ↓

706 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX