1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 * Copyright 2018 Joyent, Inc. 25 */ 26 27 /* 28 * Architecture-independent CPU control functions. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/var.h> 34 #include <sys/thread.h> 35 #include <sys/cpuvar.h> 36 #include <sys/cpu_event.h> 37 #include <sys/kstat.h> 38 #include <sys/uadmin.h> 39 #include <sys/systm.h> 40 #include <sys/errno.h> 41 #include <sys/cmn_err.h> 42 #include <sys/procset.h> 43 #include <sys/processor.h> 44 #include <sys/debug.h> 45 #include <sys/cpupart.h> 46 #include <sys/lgrp.h> 47 #include <sys/pset.h> 48 #include <sys/pghw.h> 49 #include <sys/kmem.h> 50 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ 51 #include <sys/atomic.h> 52 #include <sys/callb.h> 53 #include <sys/vtrace.h> 54 #include <sys/cyclic.h> 55 #include <sys/bitmap.h> 56 #include <sys/nvpair.h> 57 #include <sys/pool_pset.h> 58 #include <sys/msacct.h> 59 #include <sys/time.h> 60 #include <sys/archsystm.h> 61 #include <sys/sdt.h> 62 #if defined(__x86) || defined(__amd64) 63 #include <sys/x86_archext.h> 64 #endif 65 #include <sys/callo.h> 66 67 extern int mp_cpu_start(cpu_t *); 68 extern int mp_cpu_stop(cpu_t *); 69 extern int mp_cpu_poweron(cpu_t *); 70 extern int mp_cpu_poweroff(cpu_t *); 71 extern int mp_cpu_configure(int); 72 extern int mp_cpu_unconfigure(int); 73 extern void mp_cpu_faulted_enter(cpu_t *); 74 extern void mp_cpu_faulted_exit(cpu_t *); 75 76 extern int cmp_cpu_to_chip(processorid_t cpuid); 77 #ifdef __sparcv9 78 extern char *cpu_fru_fmri(cpu_t *cp); 79 #endif 80 81 static void cpu_add_active_internal(cpu_t *cp); 82 static void cpu_remove_active(cpu_t *cp); 83 static void cpu_info_kstat_create(cpu_t *cp); 84 static void cpu_info_kstat_destroy(cpu_t *cp); 85 static void cpu_stats_kstat_create(cpu_t *cp); 86 static void cpu_stats_kstat_destroy(cpu_t *cp); 87 88 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw); 89 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw); 90 static int cpu_stat_ks_update(kstat_t *ksp, int rw); 91 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t); 92 93 /* 94 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active, 95 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with 96 * respect to related locks is: 97 * 98 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock() 99 * 100 * Warning: Certain sections of code do not use the cpu_lock when 101 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since 102 * all cpus are paused during modifications to this list, a solution 103 * to protect the list is too either disable kernel preemption while 104 * walking the list, *or* recheck the cpu_next pointer at each 105 * iteration in the loop. Note that in no cases can any cached 106 * copies of the cpu pointers be kept as they may become invalid. 107 */ 108 kmutex_t cpu_lock; 109 cpu_t *cpu_list; /* list of all CPUs */ 110 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */ 111 cpu_t *cpu_active; /* list of active CPUs */ 112 static cpuset_t cpu_available; /* set of available CPUs */ 113 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */ 114 115 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */ 116 117 /* 118 * max_ncpus keeps the max cpus the system can have. Initially 119 * it's NCPU, but since most archs scan the devtree for cpus 120 * fairly early on during boot, the real max can be known before 121 * ncpus is set (useful for early NCPU based allocations). 122 */ 123 int max_ncpus = NCPU; 124 /* 125 * platforms that set max_ncpus to maxiumum number of cpus that can be 126 * dynamically added will set boot_max_ncpus to the number of cpus found 127 * at device tree scan time during boot. 128 */ 129 int boot_max_ncpus = -1; 130 int boot_ncpus = -1; 131 /* 132 * Maximum possible CPU id. This can never be >= NCPU since NCPU is 133 * used to size arrays that are indexed by CPU id. 134 */ 135 processorid_t max_cpuid = NCPU - 1; 136 137 /* 138 * Maximum cpu_seqid was given. This number can only grow and never shrink. It 139 * can be used to optimize NCPU loops to avoid going through CPUs which were 140 * never on-line. 141 */ 142 processorid_t max_cpu_seqid_ever = 0; 143 144 int ncpus = 1; 145 int ncpus_online = 1; 146 147 /* 148 * CPU that we're trying to offline. Protected by cpu_lock. 149 */ 150 cpu_t *cpu_inmotion; 151 152 /* 153 * Can be raised to suppress further weakbinding, which are instead 154 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, 155 * while individual thread weakbinding synchronization is done under thread 156 * lock. 157 */ 158 int weakbindingbarrier; 159 160 /* 161 * Variables used in pause_cpus(). 162 */ 163 static volatile char safe_list[NCPU]; 164 165 static struct _cpu_pause_info { 166 int cp_spl; /* spl saved in pause_cpus() */ 167 volatile int cp_go; /* Go signal sent after all ready */ 168 int cp_count; /* # of CPUs to pause */ 169 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */ 170 kthread_id_t cp_paused; 171 void *(*cp_func)(void *); 172 } cpu_pause_info; 173 174 static kmutex_t pause_free_mutex; 175 static kcondvar_t pause_free_cv; 176 177 178 static struct cpu_sys_stats_ks_data { 179 kstat_named_t cpu_ticks_idle; 180 kstat_named_t cpu_ticks_user; 181 kstat_named_t cpu_ticks_kernel; 182 kstat_named_t cpu_ticks_wait; 183 kstat_named_t cpu_nsec_idle; 184 kstat_named_t cpu_nsec_user; 185 kstat_named_t cpu_nsec_kernel; 186 kstat_named_t cpu_nsec_dtrace; 187 kstat_named_t cpu_nsec_intr; 188 kstat_named_t cpu_load_intr; 189 kstat_named_t wait_ticks_io; 190 kstat_named_t dtrace_probes; 191 kstat_named_t bread; 192 kstat_named_t bwrite; 193 kstat_named_t lread; 194 kstat_named_t lwrite; 195 kstat_named_t phread; 196 kstat_named_t phwrite; 197 kstat_named_t pswitch; 198 kstat_named_t trap; 199 kstat_named_t intr; 200 kstat_named_t syscall; 201 kstat_named_t sysread; 202 kstat_named_t syswrite; 203 kstat_named_t sysfork; 204 kstat_named_t sysvfork; 205 kstat_named_t sysexec; 206 kstat_named_t readch; 207 kstat_named_t writech; 208 kstat_named_t rcvint; 209 kstat_named_t xmtint; 210 kstat_named_t mdmint; 211 kstat_named_t rawch; 212 kstat_named_t canch; 213 kstat_named_t outch; 214 kstat_named_t msg; 215 kstat_named_t sema; 216 kstat_named_t namei; 217 kstat_named_t ufsiget; 218 kstat_named_t ufsdirblk; 219 kstat_named_t ufsipage; 220 kstat_named_t ufsinopage; 221 kstat_named_t procovf; 222 kstat_named_t intrthread; 223 kstat_named_t intrblk; 224 kstat_named_t intrunpin; 225 kstat_named_t idlethread; 226 kstat_named_t inv_swtch; 227 kstat_named_t nthreads; 228 kstat_named_t cpumigrate; 229 kstat_named_t xcalls; 230 kstat_named_t mutex_adenters; 231 kstat_named_t rw_rdfails; 232 kstat_named_t rw_wrfails; 233 kstat_named_t modload; 234 kstat_named_t modunload; 235 kstat_named_t bawrite; 236 kstat_named_t iowait; 237 } cpu_sys_stats_ks_data_template = { 238 { "cpu_ticks_idle", KSTAT_DATA_UINT64 }, 239 { "cpu_ticks_user", KSTAT_DATA_UINT64 }, 240 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 }, 241 { "cpu_ticks_wait", KSTAT_DATA_UINT64 }, 242 { "cpu_nsec_idle", KSTAT_DATA_UINT64 }, 243 { "cpu_nsec_user", KSTAT_DATA_UINT64 }, 244 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 }, 245 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64 }, 246 { "cpu_nsec_intr", KSTAT_DATA_UINT64 }, 247 { "cpu_load_intr", KSTAT_DATA_UINT64 }, 248 { "wait_ticks_io", KSTAT_DATA_UINT64 }, 249 { "dtrace_probes", KSTAT_DATA_UINT64 }, 250 { "bread", KSTAT_DATA_UINT64 }, 251 { "bwrite", KSTAT_DATA_UINT64 }, 252 { "lread", KSTAT_DATA_UINT64 }, 253 { "lwrite", KSTAT_DATA_UINT64 }, 254 { "phread", KSTAT_DATA_UINT64 }, 255 { "phwrite", KSTAT_DATA_UINT64 }, 256 { "pswitch", KSTAT_DATA_UINT64 }, 257 { "trap", KSTAT_DATA_UINT64 }, 258 { "intr", KSTAT_DATA_UINT64 }, 259 { "syscall", KSTAT_DATA_UINT64 }, 260 { "sysread", KSTAT_DATA_UINT64 }, 261 { "syswrite", KSTAT_DATA_UINT64 }, 262 { "sysfork", KSTAT_DATA_UINT64 }, 263 { "sysvfork", KSTAT_DATA_UINT64 }, 264 { "sysexec", KSTAT_DATA_UINT64 }, 265 { "readch", KSTAT_DATA_UINT64 }, 266 { "writech", KSTAT_DATA_UINT64 }, 267 { "rcvint", KSTAT_DATA_UINT64 }, 268 { "xmtint", KSTAT_DATA_UINT64 }, 269 { "mdmint", KSTAT_DATA_UINT64 }, 270 { "rawch", KSTAT_DATA_UINT64 }, 271 { "canch", KSTAT_DATA_UINT64 }, 272 { "outch", KSTAT_DATA_UINT64 }, 273 { "msg", KSTAT_DATA_UINT64 }, 274 { "sema", KSTAT_DATA_UINT64 }, 275 { "namei", KSTAT_DATA_UINT64 }, 276 { "ufsiget", KSTAT_DATA_UINT64 }, 277 { "ufsdirblk", KSTAT_DATA_UINT64 }, 278 { "ufsipage", KSTAT_DATA_UINT64 }, 279 { "ufsinopage", KSTAT_DATA_UINT64 }, 280 { "procovf", KSTAT_DATA_UINT64 }, 281 { "intrthread", KSTAT_DATA_UINT64 }, 282 { "intrblk", KSTAT_DATA_UINT64 }, 283 { "intrunpin", KSTAT_DATA_UINT64 }, 284 { "idlethread", KSTAT_DATA_UINT64 }, 285 { "inv_swtch", KSTAT_DATA_UINT64 }, 286 { "nthreads", KSTAT_DATA_UINT64 }, 287 { "cpumigrate", KSTAT_DATA_UINT64 }, 288 { "xcalls", KSTAT_DATA_UINT64 }, 289 { "mutex_adenters", KSTAT_DATA_UINT64 }, 290 { "rw_rdfails", KSTAT_DATA_UINT64 }, 291 { "rw_wrfails", KSTAT_DATA_UINT64 }, 292 { "modload", KSTAT_DATA_UINT64 }, 293 { "modunload", KSTAT_DATA_UINT64 }, 294 { "bawrite", KSTAT_DATA_UINT64 }, 295 { "iowait", KSTAT_DATA_UINT64 }, 296 }; 297 298 static struct cpu_vm_stats_ks_data { 299 kstat_named_t pgrec; 300 kstat_named_t pgfrec; 301 kstat_named_t pgin; 302 kstat_named_t pgpgin; 303 kstat_named_t pgout; 304 kstat_named_t pgpgout; 305 kstat_named_t swapin; 306 kstat_named_t pgswapin; 307 kstat_named_t swapout; 308 kstat_named_t pgswapout; 309 kstat_named_t zfod; 310 kstat_named_t dfree; 311 kstat_named_t scan; 312 kstat_named_t rev; 313 kstat_named_t hat_fault; 314 kstat_named_t as_fault; 315 kstat_named_t maj_fault; 316 kstat_named_t cow_fault; 317 kstat_named_t prot_fault; 318 kstat_named_t softlock; 319 kstat_named_t kernel_asflt; 320 kstat_named_t pgrrun; 321 kstat_named_t execpgin; 322 kstat_named_t execpgout; 323 kstat_named_t execfree; 324 kstat_named_t anonpgin; 325 kstat_named_t anonpgout; 326 kstat_named_t anonfree; 327 kstat_named_t fspgin; 328 kstat_named_t fspgout; 329 kstat_named_t fsfree; 330 } cpu_vm_stats_ks_data_template = { 331 { "pgrec", KSTAT_DATA_UINT64 }, 332 { "pgfrec", KSTAT_DATA_UINT64 }, 333 { "pgin", KSTAT_DATA_UINT64 }, 334 { "pgpgin", KSTAT_DATA_UINT64 }, 335 { "pgout", KSTAT_DATA_UINT64 }, 336 { "pgpgout", KSTAT_DATA_UINT64 }, 337 { "swapin", KSTAT_DATA_UINT64 }, 338 { "pgswapin", KSTAT_DATA_UINT64 }, 339 { "swapout", KSTAT_DATA_UINT64 }, 340 { "pgswapout", KSTAT_DATA_UINT64 }, 341 { "zfod", KSTAT_DATA_UINT64 }, 342 { "dfree", KSTAT_DATA_UINT64 }, 343 { "scan", KSTAT_DATA_UINT64 }, 344 { "rev", KSTAT_DATA_UINT64 }, 345 { "hat_fault", KSTAT_DATA_UINT64 }, 346 { "as_fault", KSTAT_DATA_UINT64 }, 347 { "maj_fault", KSTAT_DATA_UINT64 }, 348 { "cow_fault", KSTAT_DATA_UINT64 }, 349 { "prot_fault", KSTAT_DATA_UINT64 }, 350 { "softlock", KSTAT_DATA_UINT64 }, 351 { "kernel_asflt", KSTAT_DATA_UINT64 }, 352 { "pgrrun", KSTAT_DATA_UINT64 }, 353 { "execpgin", KSTAT_DATA_UINT64 }, 354 { "execpgout", KSTAT_DATA_UINT64 }, 355 { "execfree", KSTAT_DATA_UINT64 }, 356 { "anonpgin", KSTAT_DATA_UINT64 }, 357 { "anonpgout", KSTAT_DATA_UINT64 }, 358 { "anonfree", KSTAT_DATA_UINT64 }, 359 { "fspgin", KSTAT_DATA_UINT64 }, 360 { "fspgout", KSTAT_DATA_UINT64 }, 361 { "fsfree", KSTAT_DATA_UINT64 }, 362 }; 363 364 /* 365 * Force the specified thread to migrate to the appropriate processor. 366 * Called with thread lock held, returns with it dropped. 367 */ 368 static void 369 force_thread_migrate(kthread_id_t tp) 370 { 371 ASSERT(THREAD_LOCK_HELD(tp)); 372 if (tp == curthread) { 373 THREAD_TRANSITION(tp); 374 CL_SETRUN(tp); 375 thread_unlock_nopreempt(tp); 376 swtch(); 377 } else { 378 if (tp->t_state == TS_ONPROC) { 379 cpu_surrender(tp); 380 } else if (tp->t_state == TS_RUN) { 381 (void) dispdeq(tp); 382 setbackdq(tp); 383 } 384 thread_unlock(tp); 385 } 386 } 387 388 /* 389 * Set affinity for a specified CPU. 390 * 391 * Specifying a cpu_id of CPU_CURRENT, allowed _only_ when setting affinity for 392 * curthread, will set affinity to the CPU on which the thread is currently 393 * running. For other cpu_id values, the caller must ensure that the 394 * referenced CPU remains valid, which can be done by holding cpu_lock across 395 * this call. 396 * 397 * CPU affinity is guaranteed after return of thread_affinity_set(). If a 398 * caller setting affinity to CPU_CURRENT requires that its thread not migrate 399 * CPUs prior to a successful return, it should take extra precautions (such as 400 * their own call to kpreempt_disable) to ensure that safety. 401 * 402 * CPU_BEST can be used to pick a "best" CPU to migrate to, including 403 * potentially the current CPU. 404 * 405 * A CPU affinity reference count is maintained by thread_affinity_set and 406 * thread_affinity_clear (incrementing and decrementing it, respectively), 407 * maintaining CPU affinity while the count is non-zero, and allowing regions 408 * of code which require affinity to be nested. 409 */ 410 void 411 thread_affinity_set(kthread_id_t t, int cpu_id) 412 { 413 cpu_t *cp; 414 415 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL)); 416 417 if (cpu_id == CPU_CURRENT) { 418 VERIFY3P(t, ==, curthread); 419 kpreempt_disable(); 420 cp = CPU; 421 } else if (cpu_id == CPU_BEST) { 422 VERIFY3P(t, ==, curthread); 423 kpreempt_disable(); 424 cp = disp_choose_best_cpu(); 425 } else { 426 /* 427 * We should be asserting that cpu_lock is held here, but 428 * the NCA code doesn't acquire it. The following assert 429 * should be uncommented when the NCA code is fixed. 430 * 431 * ASSERT(MUTEX_HELD(&cpu_lock)); 432 */ 433 VERIFY((cpu_id >= 0) && (cpu_id < NCPU)); 434 cp = cpu[cpu_id]; 435 436 /* user must provide a good cpu_id */ 437 VERIFY(cp != NULL); 438 } 439 440 /* 441 * If there is already a hard affinity requested, and this affinity 442 * conflicts with that, panic. 443 */ 444 thread_lock(t); 445 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) { 446 panic("affinity_set: setting %p but already bound to %p", 447 (void *)cp, (void *)t->t_bound_cpu); 448 } 449 t->t_affinitycnt++; 450 t->t_bound_cpu = cp; 451 452 /* 453 * Make sure we're running on the right CPU. 454 */ 455 if (cp != t->t_cpu || t != curthread) { 456 ASSERT(cpu_id != CPU_CURRENT); 457 force_thread_migrate(t); /* drops thread lock */ 458 } else { 459 thread_unlock(t); 460 } 461 462 if (cpu_id == CPU_CURRENT || cpu_id == CPU_BEST) 463 kpreempt_enable(); 464 } 465 466 /* 467 * Wrapper for backward compatibility. 468 */ 469 void 470 affinity_set(int cpu_id) 471 { 472 thread_affinity_set(curthread, cpu_id); 473 } 474 475 /* 476 * Decrement the affinity reservation count and if it becomes zero, 477 * clear the CPU affinity for the current thread, or set it to the user's 478 * software binding request. 479 */ 480 void 481 thread_affinity_clear(kthread_id_t t) 482 { 483 register processorid_t binding; 484 485 thread_lock(t); 486 if (--t->t_affinitycnt == 0) { 487 if ((binding = t->t_bind_cpu) == PBIND_NONE) { 488 /* 489 * Adjust disp_max_unbound_pri if necessary. 490 */ 491 disp_adjust_unbound_pri(t); 492 t->t_bound_cpu = NULL; 493 if (t->t_cpu->cpu_part != t->t_cpupart) { 494 force_thread_migrate(t); 495 return; 496 } 497 } else { 498 t->t_bound_cpu = cpu[binding]; 499 /* 500 * Make sure the thread is running on the bound CPU. 501 */ 502 if (t->t_cpu != t->t_bound_cpu) { 503 force_thread_migrate(t); 504 return; /* already dropped lock */ 505 } 506 } 507 } 508 thread_unlock(t); 509 } 510 511 /* 512 * Wrapper for backward compatibility. 513 */ 514 void 515 affinity_clear(void) 516 { 517 thread_affinity_clear(curthread); 518 } 519 520 /* 521 * Weak cpu affinity. Bind to the "current" cpu for short periods 522 * of time during which the thread must not block (but may be preempted). 523 * Use this instead of kpreempt_disable() when it is only "no migration" 524 * rather than "no preemption" semantics that are required - disabling 525 * preemption holds higher priority threads off of cpu and if the 526 * operation that is protected is more than momentary this is not good 527 * for realtime etc. 528 * 529 * Weakly bound threads will not prevent a cpu from being offlined - 530 * we'll only run them on the cpu to which they are weakly bound but 531 * (because they do not block) we'll always be able to move them on to 532 * another cpu at offline time if we give them just a short moment to 533 * run during which they will unbind. To give a cpu a chance of offlining, 534 * however, we require a barrier to weak bindings that may be raised for a 535 * given cpu (offline/move code may set this and then wait a short time for 536 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier. 537 * 538 * There are few restrictions on the calling context of thread_nomigrate. 539 * The caller must not hold the thread lock. Calls may be nested. 540 * 541 * After weakbinding a thread must not perform actions that may block. 542 * In particular it must not call thread_affinity_set; calling that when 543 * already weakbound is nonsensical anyway. 544 * 545 * If curthread is prevented from migrating for other reasons 546 * (kernel preemption disabled; high pil; strongly bound; interrupt thread) 547 * then the weak binding will succeed even if this cpu is the target of an 548 * offline/move request. 549 */ 550 void 551 thread_nomigrate(void) 552 { 553 cpu_t *cp; 554 kthread_id_t t = curthread; 555 556 again: 557 kpreempt_disable(); 558 cp = CPU; 559 560 /* 561 * A highlevel interrupt must not modify t_nomigrate or 562 * t_weakbound_cpu of the thread it has interrupted. A lowlevel 563 * interrupt thread cannot migrate and we can avoid the 564 * thread_lock call below by short-circuiting here. In either 565 * case we can just return since no migration is possible and 566 * the condition will persist (ie, when we test for these again 567 * in thread_allowmigrate they can't have changed). Migration 568 * is also impossible if we're at or above DISP_LEVEL pil. 569 */ 570 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD || 571 getpil() >= DISP_LEVEL) { 572 kpreempt_enable(); 573 return; 574 } 575 576 /* 577 * We must be consistent with existing weak bindings. Since we 578 * may be interrupted between the increment of t_nomigrate and 579 * the store to t_weakbound_cpu below we cannot assume that 580 * t_weakbound_cpu will be set if t_nomigrate is. Note that we 581 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not 582 * always the case. 583 */ 584 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) { 585 if (!panicstr) 586 panic("thread_nomigrate: binding to %p but already " 587 "bound to %p", (void *)cp, 588 (void *)t->t_weakbound_cpu); 589 } 590 591 /* 592 * At this point we have preemption disabled and we don't yet hold 593 * the thread lock. So it's possible that somebody else could 594 * set t_bind_cpu here and not be able to force us across to the 595 * new cpu (since we have preemption disabled). 596 */ 597 thread_lock(curthread); 598 599 /* 600 * If further weak bindings are being (temporarily) suppressed then 601 * we'll settle for disabling kernel preemption (which assures 602 * no migration provided the thread does not block which it is 603 * not allowed to if using thread_nomigrate). We must remember 604 * this disposition so we can take appropriate action in 605 * thread_allowmigrate. If this is a nested call and the 606 * thread is already weakbound then fall through as normal. 607 * We remember the decision to settle for kpreempt_disable through 608 * negative nesting counting in t_nomigrate. Once a thread has had one 609 * weakbinding request satisfied in this way any further (nested) 610 * requests will continue to be satisfied in the same way, 611 * even if weak bindings have recommenced. 612 */ 613 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) { 614 --t->t_nomigrate; 615 thread_unlock(curthread); 616 return; /* with kpreempt_disable still active */ 617 } 618 619 /* 620 * We hold thread_lock so t_bind_cpu cannot change. We could, 621 * however, be running on a different cpu to which we are t_bound_cpu 622 * to (as explained above). If we grant the weak binding request 623 * in that case then the dispatcher must favour our weak binding 624 * over our strong (in which case, just as when preemption is 625 * disabled, we can continue to run on a cpu other than the one to 626 * which we are strongbound; the difference in this case is that 627 * this thread can be preempted and so can appear on the dispatch 628 * queues of a cpu other than the one it is strongbound to). 629 * 630 * If the cpu we are running on does not appear to be a current 631 * offline target (we check cpu_inmotion to determine this - since 632 * we don't hold cpu_lock we may not see a recent store to that, 633 * so it's possible that we at times can grant a weak binding to a 634 * cpu that is an offline target, but that one request will not 635 * prevent the offline from succeeding) then we will always grant 636 * the weak binding request. This includes the case above where 637 * we grant a weakbinding not commensurate with our strong binding. 638 * 639 * If our cpu does appear to be an offline target then we're inclined 640 * not to grant the weakbinding request just yet - we'd prefer to 641 * migrate to another cpu and grant the request there. The 642 * exceptions are those cases where going through preemption code 643 * will not result in us changing cpu: 644 * 645 * . interrupts have already bypassed this case (see above) 646 * . we are already weakbound to this cpu (dispatcher code will 647 * always return us to the weakbound cpu) 648 * . preemption was disabled even before we disabled it above 649 * . we are strongbound to this cpu (if we're strongbound to 650 * another and not yet running there the trip through the 651 * dispatcher will move us to the strongbound cpu and we 652 * will grant the weak binding there) 653 */ 654 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 || 655 t->t_bound_cpu == cp) { 656 /* 657 * Don't be tempted to store to t_weakbound_cpu only on 658 * the first nested bind request - if we're interrupted 659 * after the increment of t_nomigrate and before the 660 * store to t_weakbound_cpu and the interrupt calls 661 * thread_nomigrate then the assertion in thread_allowmigrate 662 * would fail. 663 */ 664 t->t_nomigrate++; 665 t->t_weakbound_cpu = cp; 666 membar_producer(); 667 thread_unlock(curthread); 668 /* 669 * Now that we have dropped the thread_lock another thread 670 * can set our t_weakbound_cpu, and will try to migrate us 671 * to the strongbound cpu (which will not be prevented by 672 * preemption being disabled since we're about to enable 673 * preemption). We have granted the weakbinding to the current 674 * cpu, so again we are in the position that is is is possible 675 * that our weak and strong bindings differ. Again this 676 * is catered for by dispatcher code which will favour our 677 * weak binding. 678 */ 679 kpreempt_enable(); 680 } else { 681 /* 682 * Move to another cpu before granting the request by 683 * forcing this thread through preemption code. When we 684 * get to set{front,back}dq called from CL_PREEMPT() 685 * cpu_choose() will be used to select a cpu to queue 686 * us on - that will see cpu_inmotion and take 687 * steps to avoid returning us to this cpu. 688 */ 689 cp->cpu_kprunrun = 1; 690 thread_unlock(curthread); 691 kpreempt_enable(); /* will call preempt() */ 692 goto again; 693 } 694 } 695 696 void 697 thread_allowmigrate(void) 698 { 699 kthread_id_t t = curthread; 700 701 ASSERT(t->t_weakbound_cpu == CPU || 702 (t->t_nomigrate < 0 && t->t_preempt > 0) || 703 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD || 704 getpil() >= DISP_LEVEL); 705 706 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) || 707 getpil() >= DISP_LEVEL) 708 return; 709 710 if (t->t_nomigrate < 0) { 711 /* 712 * This thread was granted "weak binding" in the 713 * stronger form of kernel preemption disabling. 714 * Undo a level of nesting for both t_nomigrate 715 * and t_preempt. 716 */ 717 ++t->t_nomigrate; 718 kpreempt_enable(); 719 } else if (--t->t_nomigrate == 0) { 720 /* 721 * Time to drop the weak binding. We need to cater 722 * for the case where we're weakbound to a different 723 * cpu than that to which we're strongbound (a very 724 * temporary arrangement that must only persist until 725 * weak binding drops). We don't acquire thread_lock 726 * here so even as this code executes t_bound_cpu 727 * may be changing. So we disable preemption and 728 * a) in the case that t_bound_cpu changes while we 729 * have preemption disabled kprunrun will be set 730 * asynchronously, and b) if before disabling 731 * preemption we were already on a different cpu to 732 * our t_bound_cpu then we set kprunrun ourselves 733 * to force a trip through the dispatcher when 734 * preemption is enabled. 735 */ 736 kpreempt_disable(); 737 if (t->t_bound_cpu && 738 t->t_weakbound_cpu != t->t_bound_cpu) 739 CPU->cpu_kprunrun = 1; 740 t->t_weakbound_cpu = NULL; 741 membar_producer(); 742 kpreempt_enable(); 743 } 744 } 745 746 /* 747 * weakbinding_stop can be used to temporarily cause weakbindings made 748 * with thread_nomigrate to be satisfied through the stronger action of 749 * kpreempt_disable. weakbinding_start recommences normal weakbinding. 750 */ 751 752 void 753 weakbinding_stop(void) 754 { 755 ASSERT(MUTEX_HELD(&cpu_lock)); 756 weakbindingbarrier = 1; 757 membar_producer(); /* make visible before subsequent thread_lock */ 758 } 759 760 void 761 weakbinding_start(void) 762 { 763 ASSERT(MUTEX_HELD(&cpu_lock)); 764 weakbindingbarrier = 0; 765 } 766 767 void 768 null_xcall(void) 769 { 770 } 771 772 /* 773 * This routine is called to place the CPUs in a safe place so that 774 * one of them can be taken off line or placed on line. What we are 775 * trying to do here is prevent a thread from traversing the list 776 * of active CPUs while we are changing it or from getting placed on 777 * the run queue of a CPU that has just gone off line. We do this by 778 * creating a thread with the highest possible prio for each CPU and 779 * having it call this routine. The advantage of this method is that 780 * we can eliminate all checks for CPU_ACTIVE in the disp routines. 781 * This makes disp faster at the expense of making p_online() slower 782 * which is a good trade off. 783 */ 784 static void 785 cpu_pause(int index) 786 { 787 int s; 788 struct _cpu_pause_info *cpi = &cpu_pause_info; 789 volatile char *safe = &safe_list[index]; 790 long lindex = index; 791 792 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE)); 793 794 while (*safe != PAUSE_DIE) { 795 *safe = PAUSE_READY; 796 membar_enter(); /* make sure stores are flushed */ 797 sema_v(&cpi->cp_sem); /* signal requesting thread */ 798 799 /* 800 * Wait here until all pause threads are running. That 801 * indicates that it's safe to do the spl. Until 802 * cpu_pause_info.cp_go is set, we don't want to spl 803 * because that might block clock interrupts needed 804 * to preempt threads on other CPUs. 805 */ 806 while (cpi->cp_go == 0) 807 ; 808 /* 809 * Even though we are at the highest disp prio, we need 810 * to block out all interrupts below LOCK_LEVEL so that 811 * an intr doesn't come in, wake up a thread, and call 812 * setbackdq/setfrontdq. 813 */ 814 s = splhigh(); 815 /* 816 * if cp_func has been set then call it using index as the 817 * argument, currently only used by cpr_suspend_cpus(). 818 * This function is used as the code to execute on the 819 * "paused" cpu's when a machine comes out of a sleep state 820 * and CPU's were powered off. (could also be used for 821 * hotplugging CPU's). 822 */ 823 if (cpi->cp_func != NULL) 824 (*cpi->cp_func)((void *)lindex); 825 826 mach_cpu_pause(safe); 827 828 splx(s); 829 /* 830 * Waiting is at an end. Switch out of cpu_pause 831 * loop and resume useful work. 832 */ 833 swtch(); 834 } 835 836 mutex_enter(&pause_free_mutex); 837 *safe = PAUSE_DEAD; 838 cv_broadcast(&pause_free_cv); 839 mutex_exit(&pause_free_mutex); 840 } 841 842 /* 843 * Allow the cpus to start running again. 844 */ 845 void 846 start_cpus() 847 { 848 int i; 849 850 ASSERT(MUTEX_HELD(&cpu_lock)); 851 ASSERT(cpu_pause_info.cp_paused); 852 cpu_pause_info.cp_paused = NULL; 853 for (i = 0; i < NCPU; i++) 854 safe_list[i] = PAUSE_IDLE; 855 membar_enter(); /* make sure stores are flushed */ 856 affinity_clear(); 857 splx(cpu_pause_info.cp_spl); 858 kpreempt_enable(); 859 } 860 861 /* 862 * Allocate a pause thread for a CPU. 863 */ 864 static void 865 cpu_pause_alloc(cpu_t *cp) 866 { 867 kthread_id_t t; 868 long cpun = cp->cpu_id; 869 870 /* 871 * Note, v.v_nglobpris will not change value as long as I hold 872 * cpu_lock. 873 */ 874 t = thread_create(NULL, 0, cpu_pause, (void *)cpun, 875 0, &p0, TS_STOPPED, v.v_nglobpris - 1); 876 thread_lock(t); 877 t->t_bound_cpu = cp; 878 t->t_disp_queue = cp->cpu_disp; 879 t->t_affinitycnt = 1; 880 t->t_preempt = 1; 881 thread_unlock(t); 882 cp->cpu_pause_thread = t; 883 /* 884 * Registering a thread in the callback table is usually done 885 * in the initialization code of the thread. In this 886 * case, we do it right after thread creation because the 887 * thread itself may never run, and we need to register the 888 * fact that it is safe for cpr suspend. 889 */ 890 CALLB_CPR_INIT_SAFE(t, "cpu_pause"); 891 } 892 893 /* 894 * Free a pause thread for a CPU. 895 */ 896 static void 897 cpu_pause_free(cpu_t *cp) 898 { 899 kthread_id_t t; 900 int cpun = cp->cpu_id; 901 902 ASSERT(MUTEX_HELD(&cpu_lock)); 903 /* 904 * We have to get the thread and tell it to die. 905 */ 906 if ((t = cp->cpu_pause_thread) == NULL) { 907 ASSERT(safe_list[cpun] == PAUSE_IDLE); 908 return; 909 } 910 thread_lock(t); 911 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */ 912 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */ 913 t->t_pri = v.v_nglobpris - 1; 914 ASSERT(safe_list[cpun] == PAUSE_IDLE); 915 safe_list[cpun] = PAUSE_DIE; 916 THREAD_TRANSITION(t); 917 setbackdq(t); 918 thread_unlock_nopreempt(t); 919 920 /* 921 * If we don't wait for the thread to actually die, it may try to 922 * run on the wrong cpu as part of an actual call to pause_cpus(). 923 */ 924 mutex_enter(&pause_free_mutex); 925 while (safe_list[cpun] != PAUSE_DEAD) { 926 cv_wait(&pause_free_cv, &pause_free_mutex); 927 } 928 mutex_exit(&pause_free_mutex); 929 safe_list[cpun] = PAUSE_IDLE; 930 931 cp->cpu_pause_thread = NULL; 932 } 933 934 /* 935 * Initialize basic structures for pausing CPUs. 936 */ 937 void 938 cpu_pause_init() 939 { 940 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL); 941 /* 942 * Create initial CPU pause thread. 943 */ 944 cpu_pause_alloc(CPU); 945 } 946 947 /* 948 * Start the threads used to pause another CPU. 949 */ 950 static int 951 cpu_pause_start(processorid_t cpu_id) 952 { 953 int i; 954 int cpu_count = 0; 955 956 for (i = 0; i < NCPU; i++) { 957 cpu_t *cp; 958 kthread_id_t t; 959 960 cp = cpu[i]; 961 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) { 962 safe_list[i] = PAUSE_WAIT; 963 continue; 964 } 965 966 /* 967 * Skip CPU if it is quiesced or not yet started. 968 */ 969 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) { 970 safe_list[i] = PAUSE_WAIT; 971 continue; 972 } 973 974 /* 975 * Start this CPU's pause thread. 976 */ 977 t = cp->cpu_pause_thread; 978 thread_lock(t); 979 /* 980 * Reset the priority, since nglobpris may have 981 * changed since the thread was created, if someone 982 * has loaded the RT (or some other) scheduling 983 * class. 984 */ 985 t->t_pri = v.v_nglobpris - 1; 986 THREAD_TRANSITION(t); 987 setbackdq(t); 988 thread_unlock_nopreempt(t); 989 ++cpu_count; 990 } 991 return (cpu_count); 992 } 993 994 995 /* 996 * Pause all of the CPUs except the one we are on by creating a high 997 * priority thread bound to those CPUs. 998 * 999 * Note that one must be extremely careful regarding code 1000 * executed while CPUs are paused. Since a CPU may be paused 1001 * while a thread scheduling on that CPU is holding an adaptive 1002 * lock, code executed with CPUs paused must not acquire adaptive 1003 * (or low-level spin) locks. Also, such code must not block, 1004 * since the thread that is supposed to initiate the wakeup may 1005 * never run. 1006 * 1007 * With a few exceptions, the restrictions on code executed with CPUs 1008 * paused match those for code executed at high-level interrupt 1009 * context. 1010 */ 1011 void 1012 pause_cpus(cpu_t *off_cp, void *(*func)(void *)) 1013 { 1014 processorid_t cpu_id; 1015 int i; 1016 struct _cpu_pause_info *cpi = &cpu_pause_info; 1017 1018 ASSERT(MUTEX_HELD(&cpu_lock)); 1019 ASSERT(cpi->cp_paused == NULL); 1020 cpi->cp_count = 0; 1021 cpi->cp_go = 0; 1022 for (i = 0; i < NCPU; i++) 1023 safe_list[i] = PAUSE_IDLE; 1024 kpreempt_disable(); 1025 1026 cpi->cp_func = func; 1027 1028 /* 1029 * If running on the cpu that is going offline, get off it. 1030 * This is so that it won't be necessary to rechoose a CPU 1031 * when done. 1032 */ 1033 if (CPU == off_cp) 1034 cpu_id = off_cp->cpu_next_part->cpu_id; 1035 else 1036 cpu_id = CPU->cpu_id; 1037 affinity_set(cpu_id); 1038 1039 /* 1040 * Start the pause threads and record how many were started 1041 */ 1042 cpi->cp_count = cpu_pause_start(cpu_id); 1043 1044 /* 1045 * Now wait for all CPUs to be running the pause thread. 1046 */ 1047 while (cpi->cp_count > 0) { 1048 /* 1049 * Spin reading the count without grabbing the disp 1050 * lock to make sure we don't prevent the pause 1051 * threads from getting the lock. 1052 */ 1053 while (sema_held(&cpi->cp_sem)) 1054 ; 1055 if (sema_tryp(&cpi->cp_sem)) 1056 --cpi->cp_count; 1057 } 1058 cpi->cp_go = 1; /* all have reached cpu_pause */ 1059 1060 /* 1061 * Now wait for all CPUs to spl. (Transition from PAUSE_READY 1062 * to PAUSE_WAIT.) 1063 */ 1064 for (i = 0; i < NCPU; i++) { 1065 while (safe_list[i] != PAUSE_WAIT) 1066 ; 1067 } 1068 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */ 1069 cpi->cp_paused = curthread; 1070 } 1071 1072 /* 1073 * Check whether the current thread has CPUs paused 1074 */ 1075 int 1076 cpus_paused(void) 1077 { 1078 if (cpu_pause_info.cp_paused != NULL) { 1079 ASSERT(cpu_pause_info.cp_paused == curthread); 1080 return (1); 1081 } 1082 return (0); 1083 } 1084 1085 static cpu_t * 1086 cpu_get_all(processorid_t cpun) 1087 { 1088 ASSERT(MUTEX_HELD(&cpu_lock)); 1089 1090 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun)) 1091 return (NULL); 1092 return (cpu[cpun]); 1093 } 1094 1095 /* 1096 * Check whether cpun is a valid processor id and whether it should be 1097 * visible from the current zone. If it is, return a pointer to the 1098 * associated CPU structure. 1099 */ 1100 cpu_t * 1101 cpu_get(processorid_t cpun) 1102 { 1103 cpu_t *c; 1104 1105 ASSERT(MUTEX_HELD(&cpu_lock)); 1106 c = cpu_get_all(cpun); 1107 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 1108 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c)) 1109 return (NULL); 1110 return (c); 1111 } 1112 1113 /* 1114 * The following functions should be used to check CPU states in the kernel. 1115 * They should be invoked with cpu_lock held. Kernel subsystems interested 1116 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc 1117 * states. Those are for user-land (and system call) use only. 1118 */ 1119 1120 /* 1121 * Determine whether the CPU is online and handling interrupts. 1122 */ 1123 int 1124 cpu_is_online(cpu_t *cpu) 1125 { 1126 ASSERT(MUTEX_HELD(&cpu_lock)); 1127 return (cpu_flagged_online(cpu->cpu_flags)); 1128 } 1129 1130 /* 1131 * Determine whether the CPU is offline (this includes spare and faulted). 1132 */ 1133 int 1134 cpu_is_offline(cpu_t *cpu) 1135 { 1136 ASSERT(MUTEX_HELD(&cpu_lock)); 1137 return (cpu_flagged_offline(cpu->cpu_flags)); 1138 } 1139 1140 /* 1141 * Determine whether the CPU is powered off. 1142 */ 1143 int 1144 cpu_is_poweredoff(cpu_t *cpu) 1145 { 1146 ASSERT(MUTEX_HELD(&cpu_lock)); 1147 return (cpu_flagged_poweredoff(cpu->cpu_flags)); 1148 } 1149 1150 /* 1151 * Determine whether the CPU is handling interrupts. 1152 */ 1153 int 1154 cpu_is_nointr(cpu_t *cpu) 1155 { 1156 ASSERT(MUTEX_HELD(&cpu_lock)); 1157 return (cpu_flagged_nointr(cpu->cpu_flags)); 1158 } 1159 1160 /* 1161 * Determine whether the CPU is active (scheduling threads). 1162 */ 1163 int 1164 cpu_is_active(cpu_t *cpu) 1165 { 1166 ASSERT(MUTEX_HELD(&cpu_lock)); 1167 return (cpu_flagged_active(cpu->cpu_flags)); 1168 } 1169 1170 /* 1171 * Same as above, but these require cpu_flags instead of cpu_t pointers. 1172 */ 1173 int 1174 cpu_flagged_online(cpu_flag_t cpu_flags) 1175 { 1176 return (cpu_flagged_active(cpu_flags) && 1177 (cpu_flags & CPU_ENABLE)); 1178 } 1179 1180 int 1181 cpu_flagged_offline(cpu_flag_t cpu_flags) 1182 { 1183 return (((cpu_flags & CPU_POWEROFF) == 0) && 1184 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)); 1185 } 1186 1187 int 1188 cpu_flagged_poweredoff(cpu_flag_t cpu_flags) 1189 { 1190 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF); 1191 } 1192 1193 int 1194 cpu_flagged_nointr(cpu_flag_t cpu_flags) 1195 { 1196 return (cpu_flagged_active(cpu_flags) && 1197 (cpu_flags & CPU_ENABLE) == 0); 1198 } 1199 1200 int 1201 cpu_flagged_active(cpu_flag_t cpu_flags) 1202 { 1203 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) && 1204 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY)); 1205 } 1206 1207 /* 1208 * Bring the indicated CPU online. 1209 */ 1210 int 1211 cpu_online(cpu_t *cp) 1212 { 1213 int error = 0; 1214 1215 /* 1216 * Handle on-line request. 1217 * This code must put the new CPU on the active list before 1218 * starting it because it will not be paused, and will start 1219 * using the active list immediately. The real start occurs 1220 * when the CPU_QUIESCED flag is turned off. 1221 */ 1222 1223 ASSERT(MUTEX_HELD(&cpu_lock)); 1224 1225 /* 1226 * Put all the cpus into a known safe place. 1227 * No mutexes can be entered while CPUs are paused. 1228 */ 1229 error = mp_cpu_start(cp); /* arch-dep hook */ 1230 if (error == 0) { 1231 pg_cpupart_in(cp, cp->cpu_part); 1232 pause_cpus(NULL, NULL); 1233 cpu_add_active_internal(cp); 1234 if (cp->cpu_flags & CPU_FAULTED) { 1235 cp->cpu_flags &= ~CPU_FAULTED; 1236 mp_cpu_faulted_exit(cp); 1237 } 1238 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN | 1239 CPU_SPARE); 1240 CPU_NEW_GENERATION(cp); 1241 start_cpus(); 1242 cpu_stats_kstat_create(cp); 1243 cpu_create_intrstat(cp); 1244 lgrp_kstat_create(cp); 1245 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1246 cpu_intr_enable(cp); /* arch-dep hook */ 1247 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON); 1248 cpu_set_state(cp); 1249 cyclic_online(cp); 1250 /* 1251 * This has to be called only after cyclic_online(). This 1252 * function uses cyclics. 1253 */ 1254 callout_cpu_online(cp); 1255 poke_cpu(cp->cpu_id); 1256 } 1257 1258 return (error); 1259 } 1260 1261 /* 1262 * Take the indicated CPU offline. 1263 */ 1264 int 1265 cpu_offline(cpu_t *cp, int flags) 1266 { 1267 cpupart_t *pp; 1268 int error = 0; 1269 cpu_t *ncp; 1270 int intr_enable; 1271 int cyclic_off = 0; 1272 int callout_off = 0; 1273 int loop_count; 1274 int no_quiesce = 0; 1275 int (*bound_func)(struct cpu *, int); 1276 kthread_t *t; 1277 lpl_t *cpu_lpl; 1278 proc_t *p; 1279 int lgrp_diff_lpl; 1280 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0; 1281 1282 ASSERT(MUTEX_HELD(&cpu_lock)); 1283 1284 /* 1285 * If we're going from faulted or spare to offline, just 1286 * clear these flags and update CPU state. 1287 */ 1288 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) { 1289 if (cp->cpu_flags & CPU_FAULTED) { 1290 cp->cpu_flags &= ~CPU_FAULTED; 1291 mp_cpu_faulted_exit(cp); 1292 } 1293 cp->cpu_flags &= ~CPU_SPARE; 1294 cpu_set_state(cp); 1295 return (0); 1296 } 1297 1298 /* 1299 * Handle off-line request. 1300 */ 1301 pp = cp->cpu_part; 1302 /* 1303 * Don't offline last online CPU in partition 1304 */ 1305 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2) 1306 return (EBUSY); 1307 /* 1308 * Unbind all soft-bound threads bound to our CPU and hard bound threads 1309 * if we were asked to. 1310 */ 1311 error = cpu_unbind(cp->cpu_id, unbind_all_threads); 1312 if (error != 0) 1313 return (error); 1314 /* 1315 * We shouldn't be bound to this CPU ourselves. 1316 */ 1317 if (curthread->t_bound_cpu == cp) 1318 return (EBUSY); 1319 1320 /* 1321 * Tell interested parties that this CPU is going offline. 1322 */ 1323 CPU_NEW_GENERATION(cp); 1324 cpu_state_change_notify(cp->cpu_id, CPU_OFF); 1325 1326 /* 1327 * Tell the PG subsystem that the CPU is leaving the partition 1328 */ 1329 pg_cpupart_out(cp, pp); 1330 1331 /* 1332 * Take the CPU out of interrupt participation so we won't find 1333 * bound kernel threads. If the architecture cannot completely 1334 * shut off interrupts on the CPU, don't quiesce it, but don't 1335 * run anything but interrupt thread... this is indicated by 1336 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being 1337 * off. 1338 */ 1339 intr_enable = cp->cpu_flags & CPU_ENABLE; 1340 if (intr_enable) 1341 no_quiesce = cpu_intr_disable(cp); 1342 1343 /* 1344 * Record that we are aiming to offline this cpu. This acts as 1345 * a barrier to further weak binding requests in thread_nomigrate 1346 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to 1347 * lean away from this cpu. Further strong bindings are already 1348 * avoided since we hold cpu_lock. Since threads that are set 1349 * runnable around now and others coming off the target cpu are 1350 * directed away from the target, existing strong and weak bindings 1351 * (especially the latter) to the target cpu stand maximum chance of 1352 * being able to unbind during the short delay loop below (if other 1353 * unbound threads compete they may not see cpu in time to unbind 1354 * even if they would do so immediately. 1355 */ 1356 cpu_inmotion = cp; 1357 membar_enter(); 1358 1359 /* 1360 * Check for kernel threads (strong or weak) bound to that CPU. 1361 * Strongly bound threads may not unbind, and we'll have to return 1362 * EBUSY. Weakly bound threads should always disappear - we've 1363 * stopped more weak binding with cpu_inmotion and existing 1364 * bindings will drain imminently (they may not block). Nonetheless 1365 * we will wait for a fixed period for all bound threads to disappear. 1366 * Inactive interrupt threads are OK (they'll be in TS_FREE 1367 * state). If test finds some bound threads, wait a few ticks 1368 * to give short-lived threads (such as interrupts) chance to 1369 * complete. Note that if no_quiesce is set, i.e. this cpu 1370 * is required to service interrupts, then we take the route 1371 * that permits interrupt threads to be active (or bypassed). 1372 */ 1373 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads; 1374 1375 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { 1376 if (loop_count >= 5) { 1377 error = EBUSY; /* some threads still bound */ 1378 break; 1379 } 1380 1381 /* 1382 * If some threads were assigned, give them 1383 * a chance to complete or move. 1384 * 1385 * This assumes that the clock_thread is not bound 1386 * to any CPU, because the clock_thread is needed to 1387 * do the delay(hz/100). 1388 * 1389 * Note: we still hold the cpu_lock while waiting for 1390 * the next clock tick. This is OK since it isn't 1391 * needed for anything else except processor_bind(2), 1392 * and system initialization. If we drop the lock, 1393 * we would risk another p_online disabling the last 1394 * processor. 1395 */ 1396 delay(hz/100); 1397 } 1398 1399 if (error == 0 && callout_off == 0) { 1400 callout_cpu_offline(cp); 1401 callout_off = 1; 1402 } 1403 1404 if (error == 0 && cyclic_off == 0) { 1405 if (!cyclic_offline(cp)) { 1406 /* 1407 * We must have bound cyclics... 1408 */ 1409 error = EBUSY; 1410 goto out; 1411 } 1412 cyclic_off = 1; 1413 } 1414 1415 /* 1416 * Call mp_cpu_stop() to perform any special operations 1417 * needed for this machine architecture to offline a CPU. 1418 */ 1419 if (error == 0) 1420 error = mp_cpu_stop(cp); /* arch-dep hook */ 1421 1422 /* 1423 * If that all worked, take the CPU offline and decrement 1424 * ncpus_online. 1425 */ 1426 if (error == 0) { 1427 /* 1428 * Put all the cpus into a known safe place. 1429 * No mutexes can be entered while CPUs are paused. 1430 */ 1431 pause_cpus(cp, NULL); 1432 /* 1433 * Repeat the operation, if necessary, to make sure that 1434 * all outstanding low-level interrupts run to completion 1435 * before we set the CPU_QUIESCED flag. It's also possible 1436 * that a thread has weak bound to the cpu despite our raising 1437 * cpu_inmotion above since it may have loaded that 1438 * value before the barrier became visible (this would have 1439 * to be the thread that was on the target cpu at the time 1440 * we raised the barrier). 1441 */ 1442 if ((!no_quiesce && cp->cpu_intr_actv != 0) || 1443 (*bound_func)(cp, 1)) { 1444 start_cpus(); 1445 (void) mp_cpu_start(cp); 1446 goto again; 1447 } 1448 ncp = cp->cpu_next_part; 1449 cpu_lpl = cp->cpu_lpl; 1450 ASSERT(cpu_lpl != NULL); 1451 1452 /* 1453 * Remove the CPU from the list of active CPUs. 1454 */ 1455 cpu_remove_active(cp); 1456 1457 /* 1458 * Walk the active process list and look for threads 1459 * whose home lgroup needs to be updated, or 1460 * the last CPU they run on is the one being offlined now. 1461 */ 1462 1463 ASSERT(curthread->t_cpu != cp); 1464 for (p = practive; p != NULL; p = p->p_next) { 1465 1466 t = p->p_tlist; 1467 1468 if (t == NULL) 1469 continue; 1470 1471 lgrp_diff_lpl = 0; 1472 1473 do { 1474 ASSERT(t->t_lpl != NULL); 1475 /* 1476 * Taking last CPU in lpl offline 1477 * Rehome thread if it is in this lpl 1478 * Otherwise, update the count of how many 1479 * threads are in this CPU's lgroup but have 1480 * a different lpl. 1481 */ 1482 1483 if (cpu_lpl->lpl_ncpu == 0) { 1484 if (t->t_lpl == cpu_lpl) 1485 lgrp_move_thread(t, 1486 lgrp_choose(t, 1487 t->t_cpupart), 0); 1488 else if (t->t_lpl->lpl_lgrpid == 1489 cpu_lpl->lpl_lgrpid) 1490 lgrp_diff_lpl++; 1491 } 1492 ASSERT(t->t_lpl->lpl_ncpu > 0); 1493 1494 /* 1495 * Update CPU last ran on if it was this CPU 1496 */ 1497 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1498 t->t_cpu = disp_lowpri_cpu(ncp, t, 1499 t->t_pri); 1500 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1501 t->t_weakbound_cpu == cp); 1502 1503 t = t->t_forw; 1504 } while (t != p->p_tlist); 1505 1506 /* 1507 * Didn't find any threads in the same lgroup as this 1508 * CPU with a different lpl, so remove the lgroup from 1509 * the process lgroup bitmask. 1510 */ 1511 1512 if (lgrp_diff_lpl == 0) 1513 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid); 1514 } 1515 1516 /* 1517 * Walk thread list looking for threads that need to be 1518 * rehomed, since there are some threads that are not in 1519 * their process's p_tlist. 1520 */ 1521 1522 t = curthread; 1523 do { 1524 ASSERT(t != NULL && t->t_lpl != NULL); 1525 1526 /* 1527 * Rehome threads with same lpl as this CPU when this 1528 * is the last CPU in the lpl. 1529 */ 1530 1531 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl)) 1532 lgrp_move_thread(t, 1533 lgrp_choose(t, t->t_cpupart), 1); 1534 1535 ASSERT(t->t_lpl->lpl_ncpu > 0); 1536 1537 /* 1538 * Update CPU last ran on if it was this CPU 1539 */ 1540 1541 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1542 t->t_cpu = disp_lowpri_cpu(ncp, t, t->t_pri); 1543 1544 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1545 t->t_weakbound_cpu == cp); 1546 t = t->t_next; 1547 1548 } while (t != curthread); 1549 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0); 1550 cp->cpu_flags |= CPU_OFFLINE; 1551 disp_cpu_inactive(cp); 1552 if (!no_quiesce) 1553 cp->cpu_flags |= CPU_QUIESCED; 1554 ncpus_online--; 1555 cpu_set_state(cp); 1556 cpu_inmotion = NULL; 1557 start_cpus(); 1558 cpu_stats_kstat_destroy(cp); 1559 cpu_delete_intrstat(cp); 1560 lgrp_kstat_destroy(cp); 1561 } 1562 1563 out: 1564 cpu_inmotion = NULL; 1565 1566 /* 1567 * If we failed, re-enable interrupts. 1568 * Do this even if cpu_intr_disable returned an error, because 1569 * it may have partially disabled interrupts. 1570 */ 1571 if (error && intr_enable) 1572 cpu_intr_enable(cp); 1573 1574 /* 1575 * If we failed, but managed to offline the cyclic subsystem on this 1576 * CPU, bring it back online. 1577 */ 1578 if (error && cyclic_off) 1579 cyclic_online(cp); 1580 1581 /* 1582 * If we failed, but managed to offline callouts on this CPU, 1583 * bring it back online. 1584 */ 1585 if (error && callout_off) 1586 callout_cpu_online(cp); 1587 1588 /* 1589 * If we failed, tell the PG subsystem that the CPU is back 1590 */ 1591 pg_cpupart_in(cp, pp); 1592 1593 /* 1594 * If we failed, we need to notify everyone that this CPU is back on. 1595 */ 1596 if (error != 0) { 1597 CPU_NEW_GENERATION(cp); 1598 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1599 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON); 1600 } 1601 1602 return (error); 1603 } 1604 1605 /* 1606 * Mark the indicated CPU as faulted, taking it offline. 1607 */ 1608 int 1609 cpu_faulted(cpu_t *cp, int flags) 1610 { 1611 int error = 0; 1612 1613 ASSERT(MUTEX_HELD(&cpu_lock)); 1614 ASSERT(!cpu_is_poweredoff(cp)); 1615 1616 if (cpu_is_offline(cp)) { 1617 cp->cpu_flags &= ~CPU_SPARE; 1618 cp->cpu_flags |= CPU_FAULTED; 1619 mp_cpu_faulted_enter(cp); 1620 cpu_set_state(cp); 1621 return (0); 1622 } 1623 1624 if ((error = cpu_offline(cp, flags)) == 0) { 1625 cp->cpu_flags |= CPU_FAULTED; 1626 mp_cpu_faulted_enter(cp); 1627 cpu_set_state(cp); 1628 } 1629 1630 return (error); 1631 } 1632 1633 /* 1634 * Mark the indicated CPU as a spare, taking it offline. 1635 */ 1636 int 1637 cpu_spare(cpu_t *cp, int flags) 1638 { 1639 int error = 0; 1640 1641 ASSERT(MUTEX_HELD(&cpu_lock)); 1642 ASSERT(!cpu_is_poweredoff(cp)); 1643 1644 if (cpu_is_offline(cp)) { 1645 if (cp->cpu_flags & CPU_FAULTED) { 1646 cp->cpu_flags &= ~CPU_FAULTED; 1647 mp_cpu_faulted_exit(cp); 1648 } 1649 cp->cpu_flags |= CPU_SPARE; 1650 cpu_set_state(cp); 1651 return (0); 1652 } 1653 1654 if ((error = cpu_offline(cp, flags)) == 0) { 1655 cp->cpu_flags |= CPU_SPARE; 1656 cpu_set_state(cp); 1657 } 1658 1659 return (error); 1660 } 1661 1662 /* 1663 * Take the indicated CPU from poweroff to offline. 1664 */ 1665 int 1666 cpu_poweron(cpu_t *cp) 1667 { 1668 int error = ENOTSUP; 1669 1670 ASSERT(MUTEX_HELD(&cpu_lock)); 1671 ASSERT(cpu_is_poweredoff(cp)); 1672 1673 error = mp_cpu_poweron(cp); /* arch-dep hook */ 1674 if (error == 0) 1675 cpu_set_state(cp); 1676 1677 return (error); 1678 } 1679 1680 /* 1681 * Take the indicated CPU from any inactive state to powered off. 1682 */ 1683 int 1684 cpu_poweroff(cpu_t *cp) 1685 { 1686 int error = ENOTSUP; 1687 1688 ASSERT(MUTEX_HELD(&cpu_lock)); 1689 ASSERT(cpu_is_offline(cp)); 1690 1691 if (!(cp->cpu_flags & CPU_QUIESCED)) 1692 return (EBUSY); /* not completely idle */ 1693 1694 error = mp_cpu_poweroff(cp); /* arch-dep hook */ 1695 if (error == 0) 1696 cpu_set_state(cp); 1697 1698 return (error); 1699 } 1700 1701 /* 1702 * Initialize the Sequential CPU id lookup table 1703 */ 1704 void 1705 cpu_seq_tbl_init() 1706 { 1707 cpu_t **tbl; 1708 1709 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP); 1710 tbl[0] = CPU; 1711 1712 cpu_seq = tbl; 1713 } 1714 1715 /* 1716 * Initialize the CPU lists for the first CPU. 1717 */ 1718 void 1719 cpu_list_init(cpu_t *cp) 1720 { 1721 cp->cpu_next = cp; 1722 cp->cpu_prev = cp; 1723 cpu_list = cp; 1724 clock_cpu_list = cp; 1725 1726 cp->cpu_next_onln = cp; 1727 cp->cpu_prev_onln = cp; 1728 cpu_active = cp; 1729 1730 cp->cpu_seqid = 0; 1731 CPUSET_ADD(cpu_seqid_inuse, 0); 1732 1733 /* 1734 * Bootstrap cpu_seq using cpu_list 1735 * The cpu_seq[] table will be dynamically allocated 1736 * when kmem later becomes available (but before going MP) 1737 */ 1738 cpu_seq = &cpu_list; 1739 1740 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1741 cp_default.cp_cpulist = cp; 1742 cp_default.cp_ncpus = 1; 1743 cp->cpu_next_part = cp; 1744 cp->cpu_prev_part = cp; 1745 cp->cpu_part = &cp_default; 1746 1747 CPUSET_ADD(cpu_available, cp->cpu_id); 1748 } 1749 1750 /* 1751 * Insert a CPU into the list of available CPUs. 1752 */ 1753 void 1754 cpu_add_unit(cpu_t *cp) 1755 { 1756 int seqid; 1757 1758 ASSERT(MUTEX_HELD(&cpu_lock)); 1759 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1760 1761 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0); 1762 1763 /* 1764 * Note: most users of the cpu_list will grab the 1765 * cpu_lock to insure that it isn't modified. However, 1766 * certain users can't or won't do that. To allow this 1767 * we pause the other cpus. Users who walk the list 1768 * without cpu_lock, must disable kernel preemption 1769 * to insure that the list isn't modified underneath 1770 * them. Also, any cached pointers to cpu structures 1771 * must be revalidated by checking to see if the 1772 * cpu_next pointer points to itself. This check must 1773 * be done with the cpu_lock held or kernel preemption 1774 * disabled. This check relies upon the fact that 1775 * old cpu structures are not free'ed or cleared after 1776 * then are removed from the cpu_list. 1777 * 1778 * Note that the clock code walks the cpu list dereferencing 1779 * the cpu_part pointer, so we need to initialize it before 1780 * adding the cpu to the list. 1781 */ 1782 cp->cpu_part = &cp_default; 1783 pause_cpus(NULL, NULL); 1784 cp->cpu_next = cpu_list; 1785 cp->cpu_prev = cpu_list->cpu_prev; 1786 cpu_list->cpu_prev->cpu_next = cp; 1787 cpu_list->cpu_prev = cp; 1788 start_cpus(); 1789 1790 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++) 1791 continue; 1792 CPUSET_ADD(cpu_seqid_inuse, seqid); 1793 cp->cpu_seqid = seqid; 1794 1795 if (seqid > max_cpu_seqid_ever) 1796 max_cpu_seqid_ever = seqid; 1797 1798 ASSERT(ncpus < max_ncpus); 1799 ncpus++; 1800 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1801 cpu[cp->cpu_id] = cp; 1802 CPUSET_ADD(cpu_available, cp->cpu_id); 1803 cpu_seq[cp->cpu_seqid] = cp; 1804 1805 /* 1806 * allocate a pause thread for this CPU. 1807 */ 1808 cpu_pause_alloc(cp); 1809 1810 /* 1811 * So that new CPUs won't have NULL prev_onln and next_onln pointers, 1812 * link them into a list of just that CPU. 1813 * This is so that disp_lowpri_cpu will work for thread_create in 1814 * pause_cpus() when called from the startup thread in a new CPU. 1815 */ 1816 cp->cpu_next_onln = cp; 1817 cp->cpu_prev_onln = cp; 1818 cpu_info_kstat_create(cp); 1819 cp->cpu_next_part = cp; 1820 cp->cpu_prev_part = cp; 1821 1822 init_cpu_mstate(cp, CMS_SYSTEM); 1823 1824 pool_pset_mod = gethrtime(); 1825 } 1826 1827 /* 1828 * Do the opposite of cpu_add_unit(). 1829 */ 1830 void 1831 cpu_del_unit(int cpuid) 1832 { 1833 struct cpu *cp, *cpnext; 1834 1835 ASSERT(MUTEX_HELD(&cpu_lock)); 1836 cp = cpu[cpuid]; 1837 ASSERT(cp != NULL); 1838 1839 ASSERT(cp->cpu_next_onln == cp); 1840 ASSERT(cp->cpu_prev_onln == cp); 1841 ASSERT(cp->cpu_next_part == cp); 1842 ASSERT(cp->cpu_prev_part == cp); 1843 1844 /* 1845 * Tear down the CPU's physical ID cache, and update any 1846 * processor groups 1847 */ 1848 pg_cpu_fini(cp, NULL); 1849 pghw_physid_destroy(cp); 1850 1851 /* 1852 * Destroy kstat stuff. 1853 */ 1854 cpu_info_kstat_destroy(cp); 1855 term_cpu_mstate(cp); 1856 /* 1857 * Free up pause thread. 1858 */ 1859 cpu_pause_free(cp); 1860 CPUSET_DEL(cpu_available, cp->cpu_id); 1861 cpu[cp->cpu_id] = NULL; 1862 cpu_seq[cp->cpu_seqid] = NULL; 1863 1864 /* 1865 * The clock thread and mutex_vector_enter cannot hold the 1866 * cpu_lock while traversing the cpu list, therefore we pause 1867 * all other threads by pausing the other cpus. These, and any 1868 * other routines holding cpu pointers while possibly sleeping 1869 * must be sure to call kpreempt_disable before processing the 1870 * list and be sure to check that the cpu has not been deleted 1871 * after any sleeps (check cp->cpu_next != NULL). We guarantee 1872 * to keep the deleted cpu structure around. 1873 * 1874 * Note that this MUST be done AFTER cpu_available 1875 * has been updated so that we don't waste time 1876 * trying to pause the cpu we're trying to delete. 1877 */ 1878 pause_cpus(NULL, NULL); 1879 1880 cpnext = cp->cpu_next; 1881 cp->cpu_prev->cpu_next = cp->cpu_next; 1882 cp->cpu_next->cpu_prev = cp->cpu_prev; 1883 if (cp == cpu_list) 1884 cpu_list = cpnext; 1885 1886 /* 1887 * Signals that the cpu has been deleted (see above). 1888 */ 1889 cp->cpu_next = NULL; 1890 cp->cpu_prev = NULL; 1891 1892 start_cpus(); 1893 1894 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid); 1895 ncpus--; 1896 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0); 1897 1898 pool_pset_mod = gethrtime(); 1899 } 1900 1901 /* 1902 * Add a CPU to the list of active CPUs. 1903 * This routine must not get any locks, because other CPUs are paused. 1904 */ 1905 static void 1906 cpu_add_active_internal(cpu_t *cp) 1907 { 1908 cpupart_t *pp = cp->cpu_part; 1909 1910 ASSERT(MUTEX_HELD(&cpu_lock)); 1911 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1912 1913 ncpus_online++; 1914 cpu_set_state(cp); 1915 cp->cpu_next_onln = cpu_active; 1916 cp->cpu_prev_onln = cpu_active->cpu_prev_onln; 1917 cpu_active->cpu_prev_onln->cpu_next_onln = cp; 1918 cpu_active->cpu_prev_onln = cp; 1919 1920 if (pp->cp_cpulist) { 1921 cp->cpu_next_part = pp->cp_cpulist; 1922 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part; 1923 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp; 1924 pp->cp_cpulist->cpu_prev_part = cp; 1925 } else { 1926 ASSERT(pp->cp_ncpus == 0); 1927 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 1928 } 1929 pp->cp_ncpus++; 1930 if (pp->cp_ncpus == 1) { 1931 cp_numparts_nonempty++; 1932 ASSERT(cp_numparts_nonempty != 0); 1933 } 1934 1935 pg_cpu_active(cp); 1936 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); 1937 1938 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); 1939 } 1940 1941 /* 1942 * Add a CPU to the list of active CPUs. 1943 * This is called from machine-dependent layers when a new CPU is started. 1944 */ 1945 void 1946 cpu_add_active(cpu_t *cp) 1947 { 1948 pg_cpupart_in(cp, cp->cpu_part); 1949 1950 pause_cpus(NULL, NULL); 1951 cpu_add_active_internal(cp); 1952 start_cpus(); 1953 1954 cpu_stats_kstat_create(cp); 1955 cpu_create_intrstat(cp); 1956 lgrp_kstat_create(cp); 1957 cpu_state_change_notify(cp->cpu_id, CPU_INIT); 1958 } 1959 1960 1961 /* 1962 * Remove a CPU from the list of active CPUs. 1963 * This routine must not get any locks, because other CPUs are paused. 1964 */ 1965 /* ARGSUSED */ 1966 static void 1967 cpu_remove_active(cpu_t *cp) 1968 { 1969 cpupart_t *pp = cp->cpu_part; 1970 1971 ASSERT(MUTEX_HELD(&cpu_lock)); 1972 ASSERT(cp->cpu_next_onln != cp); /* not the last one */ 1973 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ 1974 1975 pg_cpu_inactive(cp); 1976 1977 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); 1978 1979 if (cp == clock_cpu_list) 1980 clock_cpu_list = cp->cpu_next_onln; 1981 1982 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln; 1983 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln; 1984 if (cpu_active == cp) { 1985 cpu_active = cp->cpu_next_onln; 1986 } 1987 cp->cpu_next_onln = cp; 1988 cp->cpu_prev_onln = cp; 1989 1990 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 1991 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 1992 if (pp->cp_cpulist == cp) { 1993 pp->cp_cpulist = cp->cpu_next_part; 1994 ASSERT(pp->cp_cpulist != cp); 1995 } 1996 cp->cpu_next_part = cp; 1997 cp->cpu_prev_part = cp; 1998 pp->cp_ncpus--; 1999 if (pp->cp_ncpus == 0) { 2000 cp_numparts_nonempty--; 2001 ASSERT(cp_numparts_nonempty != 0); 2002 } 2003 } 2004 2005 /* 2006 * Routine used to setup a newly inserted CPU in preparation for starting 2007 * it running code. 2008 */ 2009 int 2010 cpu_configure(int cpuid) 2011 { 2012 int retval = 0; 2013 2014 ASSERT(MUTEX_HELD(&cpu_lock)); 2015 2016 /* 2017 * Some structures are statically allocated based upon 2018 * the maximum number of cpus the system supports. Do not 2019 * try to add anything beyond this limit. 2020 */ 2021 if (cpuid < 0 || cpuid >= NCPU) { 2022 return (EINVAL); 2023 } 2024 2025 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) { 2026 return (EALREADY); 2027 } 2028 2029 if ((retval = mp_cpu_configure(cpuid)) != 0) { 2030 return (retval); 2031 } 2032 2033 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF; 2034 cpu_set_state(cpu[cpuid]); 2035 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG); 2036 if (retval != 0) 2037 (void) mp_cpu_unconfigure(cpuid); 2038 2039 return (retval); 2040 } 2041 2042 /* 2043 * Routine used to cleanup a CPU that has been powered off. This will 2044 * destroy all per-cpu information related to this cpu. 2045 */ 2046 int 2047 cpu_unconfigure(int cpuid) 2048 { 2049 int error; 2050 2051 ASSERT(MUTEX_HELD(&cpu_lock)); 2052 2053 if (cpu[cpuid] == NULL) { 2054 return (ENODEV); 2055 } 2056 2057 if (cpu[cpuid]->cpu_flags == 0) { 2058 return (EALREADY); 2059 } 2060 2061 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) { 2062 return (EBUSY); 2063 } 2064 2065 if (cpu[cpuid]->cpu_props != NULL) { 2066 (void) nvlist_free(cpu[cpuid]->cpu_props); 2067 cpu[cpuid]->cpu_props = NULL; 2068 } 2069 2070 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG); 2071 2072 if (error != 0) 2073 return (error); 2074 2075 return (mp_cpu_unconfigure(cpuid)); 2076 } 2077 2078 /* 2079 * Routines for registering and de-registering cpu_setup callback functions. 2080 * 2081 * Caller's context 2082 * These routines must not be called from a driver's attach(9E) or 2083 * detach(9E) entry point. 2084 * 2085 * NOTE: CPU callbacks should not block. They are called with cpu_lock held. 2086 */ 2087 2088 /* 2089 * Ideally, these would be dynamically allocated and put into a linked 2090 * list; however that is not feasible because the registration routine 2091 * has to be available before the kmem allocator is working (in fact, 2092 * it is called by the kmem allocator init code). In any case, there 2093 * are quite a few extra entries for future users. 2094 */ 2095 #define NCPU_SETUPS 20 2096 2097 struct cpu_setup { 2098 cpu_setup_func_t *func; 2099 void *arg; 2100 } cpu_setups[NCPU_SETUPS]; 2101 2102 void 2103 register_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2104 { 2105 int i; 2106 2107 ASSERT(MUTEX_HELD(&cpu_lock)); 2108 2109 for (i = 0; i < NCPU_SETUPS; i++) 2110 if (cpu_setups[i].func == NULL) 2111 break; 2112 if (i >= NCPU_SETUPS) 2113 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries"); 2114 2115 cpu_setups[i].func = func; 2116 cpu_setups[i].arg = arg; 2117 } 2118 2119 void 2120 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2121 { 2122 int i; 2123 2124 ASSERT(MUTEX_HELD(&cpu_lock)); 2125 2126 for (i = 0; i < NCPU_SETUPS; i++) 2127 if ((cpu_setups[i].func == func) && 2128 (cpu_setups[i].arg == arg)) 2129 break; 2130 if (i >= NCPU_SETUPS) 2131 cmn_err(CE_PANIC, "Could not find cpu_setup callback to " 2132 "deregister"); 2133 2134 cpu_setups[i].func = NULL; 2135 cpu_setups[i].arg = 0; 2136 } 2137 2138 /* 2139 * Call any state change hooks for this CPU, ignore any errors. 2140 */ 2141 void 2142 cpu_state_change_notify(int id, cpu_setup_t what) 2143 { 2144 int i; 2145 2146 ASSERT(MUTEX_HELD(&cpu_lock)); 2147 2148 for (i = 0; i < NCPU_SETUPS; i++) { 2149 if (cpu_setups[i].func != NULL) { 2150 cpu_setups[i].func(what, id, cpu_setups[i].arg); 2151 } 2152 } 2153 } 2154 2155 /* 2156 * Call any state change hooks for this CPU, undo it if error found. 2157 */ 2158 static int 2159 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo) 2160 { 2161 int i; 2162 int retval = 0; 2163 2164 ASSERT(MUTEX_HELD(&cpu_lock)); 2165 2166 for (i = 0; i < NCPU_SETUPS; i++) { 2167 if (cpu_setups[i].func != NULL) { 2168 retval = cpu_setups[i].func(what, id, 2169 cpu_setups[i].arg); 2170 if (retval) { 2171 for (i--; i >= 0; i--) { 2172 if (cpu_setups[i].func != NULL) 2173 cpu_setups[i].func(undo, 2174 id, cpu_setups[i].arg); 2175 } 2176 break; 2177 } 2178 } 2179 } 2180 return (retval); 2181 } 2182 2183 /* 2184 * Export information about this CPU via the kstat mechanism. 2185 */ 2186 static struct { 2187 kstat_named_t ci_state; 2188 kstat_named_t ci_state_begin; 2189 kstat_named_t ci_cpu_type; 2190 kstat_named_t ci_fpu_type; 2191 kstat_named_t ci_clock_MHz; 2192 kstat_named_t ci_chip_id; 2193 kstat_named_t ci_implementation; 2194 kstat_named_t ci_brandstr; 2195 kstat_named_t ci_core_id; 2196 kstat_named_t ci_curr_clock_Hz; 2197 kstat_named_t ci_supp_freq_Hz; 2198 kstat_named_t ci_pg_id; 2199 #if defined(__sparcv9) 2200 kstat_named_t ci_device_ID; 2201 kstat_named_t ci_cpu_fru; 2202 #endif 2203 #if defined(__x86) 2204 kstat_named_t ci_vendorstr; 2205 kstat_named_t ci_family; 2206 kstat_named_t ci_model; 2207 kstat_named_t ci_step; 2208 kstat_named_t ci_clogid; 2209 kstat_named_t ci_pkg_core_id; 2210 kstat_named_t ci_ncpuperchip; 2211 kstat_named_t ci_ncoreperchip; 2212 kstat_named_t ci_max_cstates; 2213 kstat_named_t ci_curr_cstate; 2214 kstat_named_t ci_cacheid; 2215 kstat_named_t ci_sktstr; 2216 #endif 2217 } cpu_info_template = { 2218 { "state", KSTAT_DATA_CHAR }, 2219 { "state_begin", KSTAT_DATA_LONG }, 2220 { "cpu_type", KSTAT_DATA_CHAR }, 2221 { "fpu_type", KSTAT_DATA_CHAR }, 2222 { "clock_MHz", KSTAT_DATA_LONG }, 2223 { "chip_id", KSTAT_DATA_LONG }, 2224 { "implementation", KSTAT_DATA_STRING }, 2225 { "brand", KSTAT_DATA_STRING }, 2226 { "core_id", KSTAT_DATA_LONG }, 2227 { "current_clock_Hz", KSTAT_DATA_UINT64 }, 2228 { "supported_frequencies_Hz", KSTAT_DATA_STRING }, 2229 { "pg_id", KSTAT_DATA_LONG }, 2230 #if defined(__sparcv9) 2231 { "device_ID", KSTAT_DATA_UINT64 }, 2232 { "cpu_fru", KSTAT_DATA_STRING }, 2233 #endif 2234 #if defined(__x86) 2235 { "vendor_id", KSTAT_DATA_STRING }, 2236 { "family", KSTAT_DATA_INT32 }, 2237 { "model", KSTAT_DATA_INT32 }, 2238 { "stepping", KSTAT_DATA_INT32 }, 2239 { "clog_id", KSTAT_DATA_INT32 }, 2240 { "pkg_core_id", KSTAT_DATA_LONG }, 2241 { "ncpu_per_chip", KSTAT_DATA_INT32 }, 2242 { "ncore_per_chip", KSTAT_DATA_INT32 }, 2243 { "supported_max_cstates", KSTAT_DATA_INT32 }, 2244 { "current_cstate", KSTAT_DATA_INT32 }, 2245 { "cache_id", KSTAT_DATA_INT32 }, 2246 { "socket_type", KSTAT_DATA_STRING }, 2247 #endif 2248 }; 2249 2250 static kmutex_t cpu_info_template_lock; 2251 2252 static int 2253 cpu_info_kstat_update(kstat_t *ksp, int rw) 2254 { 2255 cpu_t *cp = ksp->ks_private; 2256 const char *pi_state; 2257 2258 if (rw == KSTAT_WRITE) 2259 return (EACCES); 2260 2261 #if defined(__x86) 2262 /* Is the cpu still initialising itself? */ 2263 if (cpuid_checkpass(cp, 1) == 0) 2264 return (ENXIO); 2265 #endif 2266 switch (cp->cpu_type_info.pi_state) { 2267 case P_ONLINE: 2268 pi_state = PS_ONLINE; 2269 break; 2270 case P_POWEROFF: 2271 pi_state = PS_POWEROFF; 2272 break; 2273 case P_NOINTR: 2274 pi_state = PS_NOINTR; 2275 break; 2276 case P_FAULTED: 2277 pi_state = PS_FAULTED; 2278 break; 2279 case P_SPARE: 2280 pi_state = PS_SPARE; 2281 break; 2282 case P_OFFLINE: 2283 pi_state = PS_OFFLINE; 2284 break; 2285 default: 2286 pi_state = "unknown"; 2287 } 2288 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state); 2289 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin; 2290 (void) strncpy(cpu_info_template.ci_cpu_type.value.c, 2291 cp->cpu_type_info.pi_processor_type, 15); 2292 (void) strncpy(cpu_info_template.ci_fpu_type.value.c, 2293 cp->cpu_type_info.pi_fputypes, 15); 2294 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; 2295 cpu_info_template.ci_chip_id.value.l = 2296 pg_plat_hw_instance_id(cp, PGHW_CHIP); 2297 kstat_named_setstr(&cpu_info_template.ci_implementation, 2298 cp->cpu_idstr); 2299 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); 2300 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); 2301 cpu_info_template.ci_curr_clock_Hz.value.ui64 = 2302 cp->cpu_curr_clock; 2303 cpu_info_template.ci_pg_id.value.l = 2304 cp->cpu_pg && cp->cpu_pg->cmt_lineage ? 2305 cp->cpu_pg->cmt_lineage->pg_id : -1; 2306 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz, 2307 cp->cpu_supp_freqs); 2308 #if defined(__sparcv9) 2309 cpu_info_template.ci_device_ID.value.ui64 = 2310 cpunodes[cp->cpu_id].device_id; 2311 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp)); 2312 #endif 2313 #if defined(__x86) 2314 kstat_named_setstr(&cpu_info_template.ci_vendorstr, 2315 cpuid_getvendorstr(cp)); 2316 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); 2317 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); 2318 cpu_info_template.ci_step.value.l = cpuid_getstep(cp); 2319 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); 2320 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp); 2321 cpu_info_template.ci_ncoreperchip.value.l = 2322 cpuid_get_ncore_per_chip(cp); 2323 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); 2324 cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates; 2325 cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp); 2326 cpu_info_template.ci_cacheid.value.i32 = cpuid_get_cacheid(cp); 2327 kstat_named_setstr(&cpu_info_template.ci_sktstr, 2328 cpuid_getsocketstr(cp)); 2329 #endif 2330 2331 return (0); 2332 } 2333 2334 static void 2335 cpu_info_kstat_create(cpu_t *cp) 2336 { 2337 zoneid_t zoneid; 2338 2339 ASSERT(MUTEX_HELD(&cpu_lock)); 2340 2341 if (pool_pset_enabled()) 2342 zoneid = GLOBAL_ZONEID; 2343 else 2344 zoneid = ALL_ZONES; 2345 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id, 2346 NULL, "misc", KSTAT_TYPE_NAMED, 2347 sizeof (cpu_info_template) / sizeof (kstat_named_t), 2348 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE, zoneid)) != NULL) { 2349 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN; 2350 #if defined(__sparcv9) 2351 cp->cpu_info_kstat->ks_data_size += 2352 strlen(cpu_fru_fmri(cp)) + 1; 2353 #endif 2354 #if defined(__x86) 2355 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN; 2356 #endif 2357 if (cp->cpu_supp_freqs != NULL) 2358 cp->cpu_info_kstat->ks_data_size += 2359 strlen(cp->cpu_supp_freqs) + 1; 2360 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock; 2361 cp->cpu_info_kstat->ks_data = &cpu_info_template; 2362 cp->cpu_info_kstat->ks_private = cp; 2363 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update; 2364 kstat_install(cp->cpu_info_kstat); 2365 } 2366 } 2367 2368 static void 2369 cpu_info_kstat_destroy(cpu_t *cp) 2370 { 2371 ASSERT(MUTEX_HELD(&cpu_lock)); 2372 2373 kstat_delete(cp->cpu_info_kstat); 2374 cp->cpu_info_kstat = NULL; 2375 } 2376 2377 /* 2378 * Create and install kstats for the boot CPU. 2379 */ 2380 void 2381 cpu_kstat_init(cpu_t *cp) 2382 { 2383 mutex_enter(&cpu_lock); 2384 cpu_info_kstat_create(cp); 2385 cpu_stats_kstat_create(cp); 2386 cpu_create_intrstat(cp); 2387 cpu_set_state(cp); 2388 mutex_exit(&cpu_lock); 2389 } 2390 2391 /* 2392 * Make visible to the zone that subset of the cpu information that would be 2393 * initialized when a cpu is configured (but still offline). 2394 */ 2395 void 2396 cpu_visibility_configure(cpu_t *cp, zone_t *zone) 2397 { 2398 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2399 2400 ASSERT(MUTEX_HELD(&cpu_lock)); 2401 ASSERT(pool_pset_enabled()); 2402 ASSERT(cp != NULL); 2403 2404 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2405 zone->zone_ncpus++; 2406 ASSERT(zone->zone_ncpus <= ncpus); 2407 } 2408 if (cp->cpu_info_kstat != NULL) 2409 kstat_zone_add(cp->cpu_info_kstat, zoneid); 2410 } 2411 2412 /* 2413 * Make visible to the zone that subset of the cpu information that would be 2414 * initialized when a previously configured cpu is onlined. 2415 */ 2416 void 2417 cpu_visibility_online(cpu_t *cp, zone_t *zone) 2418 { 2419 kstat_t *ksp; 2420 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2421 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2422 processorid_t cpun; 2423 2424 ASSERT(MUTEX_HELD(&cpu_lock)); 2425 ASSERT(pool_pset_enabled()); 2426 ASSERT(cp != NULL); 2427 ASSERT(cpu_is_active(cp)); 2428 2429 cpun = cp->cpu_id; 2430 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2431 zone->zone_ncpus_online++; 2432 ASSERT(zone->zone_ncpus_online <= ncpus_online); 2433 } 2434 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2435 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2436 != NULL) { 2437 kstat_zone_add(ksp, zoneid); 2438 kstat_rele(ksp); 2439 } 2440 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2441 kstat_zone_add(ksp, zoneid); 2442 kstat_rele(ksp); 2443 } 2444 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2445 kstat_zone_add(ksp, zoneid); 2446 kstat_rele(ksp); 2447 } 2448 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2449 NULL) { 2450 kstat_zone_add(ksp, zoneid); 2451 kstat_rele(ksp); 2452 } 2453 } 2454 2455 /* 2456 * Update relevant kstats such that cpu is now visible to processes 2457 * executing in specified zone. 2458 */ 2459 void 2460 cpu_visibility_add(cpu_t *cp, zone_t *zone) 2461 { 2462 cpu_visibility_configure(cp, zone); 2463 if (cpu_is_active(cp)) 2464 cpu_visibility_online(cp, zone); 2465 } 2466 2467 /* 2468 * Make invisible to the zone that subset of the cpu information that would be 2469 * torn down when a previously offlined cpu is unconfigured. 2470 */ 2471 void 2472 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone) 2473 { 2474 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2475 2476 ASSERT(MUTEX_HELD(&cpu_lock)); 2477 ASSERT(pool_pset_enabled()); 2478 ASSERT(cp != NULL); 2479 2480 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2481 ASSERT(zone->zone_ncpus != 0); 2482 zone->zone_ncpus--; 2483 } 2484 if (cp->cpu_info_kstat) 2485 kstat_zone_remove(cp->cpu_info_kstat, zoneid); 2486 } 2487 2488 /* 2489 * Make invisible to the zone that subset of the cpu information that would be 2490 * torn down when a cpu is offlined (but still configured). 2491 */ 2492 void 2493 cpu_visibility_offline(cpu_t *cp, zone_t *zone) 2494 { 2495 kstat_t *ksp; 2496 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2497 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2498 processorid_t cpun; 2499 2500 ASSERT(MUTEX_HELD(&cpu_lock)); 2501 ASSERT(pool_pset_enabled()); 2502 ASSERT(cp != NULL); 2503 ASSERT(cpu_is_active(cp)); 2504 2505 cpun = cp->cpu_id; 2506 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2507 ASSERT(zone->zone_ncpus_online != 0); 2508 zone->zone_ncpus_online--; 2509 } 2510 2511 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2512 NULL) { 2513 kstat_zone_remove(ksp, zoneid); 2514 kstat_rele(ksp); 2515 } 2516 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2517 kstat_zone_remove(ksp, zoneid); 2518 kstat_rele(ksp); 2519 } 2520 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2521 kstat_zone_remove(ksp, zoneid); 2522 kstat_rele(ksp); 2523 } 2524 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2525 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2526 != NULL) { 2527 kstat_zone_remove(ksp, zoneid); 2528 kstat_rele(ksp); 2529 } 2530 } 2531 2532 /* 2533 * Update relevant kstats such that cpu is no longer visible to processes 2534 * executing in specified zone. 2535 */ 2536 void 2537 cpu_visibility_remove(cpu_t *cp, zone_t *zone) 2538 { 2539 if (cpu_is_active(cp)) 2540 cpu_visibility_offline(cp, zone); 2541 cpu_visibility_unconfigure(cp, zone); 2542 } 2543 2544 /* 2545 * Bind a thread to a CPU as requested. 2546 */ 2547 int 2548 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind, 2549 int *error) 2550 { 2551 processorid_t binding; 2552 cpu_t *cp = NULL; 2553 2554 ASSERT(MUTEX_HELD(&cpu_lock)); 2555 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 2556 2557 thread_lock(tp); 2558 2559 /* 2560 * Record old binding, but change the obind, which was initialized 2561 * to PBIND_NONE, only if this thread has a binding. This avoids 2562 * reporting PBIND_NONE for a process when some LWPs are bound. 2563 */ 2564 binding = tp->t_bind_cpu; 2565 if (binding != PBIND_NONE) 2566 *obind = binding; /* record old binding */ 2567 2568 switch (bind) { 2569 case PBIND_QUERY: 2570 /* Just return the old binding */ 2571 thread_unlock(tp); 2572 return (0); 2573 2574 case PBIND_QUERY_TYPE: 2575 /* Return the binding type */ 2576 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD; 2577 thread_unlock(tp); 2578 return (0); 2579 2580 case PBIND_SOFT: 2581 /* 2582 * Set soft binding for this thread and return the actual 2583 * binding 2584 */ 2585 TB_CPU_SOFT_SET(tp); 2586 thread_unlock(tp); 2587 return (0); 2588 2589 case PBIND_HARD: 2590 /* 2591 * Set hard binding for this thread and return the actual 2592 * binding 2593 */ 2594 TB_CPU_HARD_SET(tp); 2595 thread_unlock(tp); 2596 return (0); 2597 2598 default: 2599 break; 2600 } 2601 2602 /* 2603 * If this thread/LWP cannot be bound because of permission 2604 * problems, just note that and return success so that the 2605 * other threads/LWPs will be bound. This is the way 2606 * processor_bind() is defined to work. 2607 * 2608 * Binding will get EPERM if the thread is of system class 2609 * or hasprocperm() fails. 2610 */ 2611 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) { 2612 *error = EPERM; 2613 thread_unlock(tp); 2614 return (0); 2615 } 2616 2617 binding = bind; 2618 if (binding != PBIND_NONE) { 2619 cp = cpu_get((processorid_t)binding); 2620 /* 2621 * Make sure binding is valid and is in right partition. 2622 */ 2623 if (cp == NULL || tp->t_cpupart != cp->cpu_part) { 2624 *error = EINVAL; 2625 thread_unlock(tp); 2626 return (0); 2627 } 2628 } 2629 tp->t_bind_cpu = binding; /* set new binding */ 2630 2631 /* 2632 * If there is no system-set reason for affinity, set 2633 * the t_bound_cpu field to reflect the binding. 2634 */ 2635 if (tp->t_affinitycnt == 0) { 2636 if (binding == PBIND_NONE) { 2637 /* 2638 * We may need to adjust disp_max_unbound_pri 2639 * since we're becoming unbound. 2640 */ 2641 disp_adjust_unbound_pri(tp); 2642 2643 tp->t_bound_cpu = NULL; /* set new binding */ 2644 2645 /* 2646 * Move thread to lgroup with strongest affinity 2647 * after unbinding 2648 */ 2649 if (tp->t_lgrp_affinity) 2650 lgrp_move_thread(tp, 2651 lgrp_choose(tp, tp->t_cpupart), 1); 2652 2653 if (tp->t_state == TS_ONPROC && 2654 tp->t_cpu->cpu_part != tp->t_cpupart) 2655 cpu_surrender(tp); 2656 } else { 2657 lpl_t *lpl; 2658 2659 tp->t_bound_cpu = cp; 2660 ASSERT(cp->cpu_lpl != NULL); 2661 2662 /* 2663 * Set home to lgroup with most affinity containing CPU 2664 * that thread is being bound or minimum bounding 2665 * lgroup if no affinities set 2666 */ 2667 if (tp->t_lgrp_affinity) 2668 lpl = lgrp_affinity_best(tp, tp->t_cpupart, 2669 LGRP_NONE, B_FALSE); 2670 else 2671 lpl = cp->cpu_lpl; 2672 2673 if (tp->t_lpl != lpl) { 2674 /* can't grab cpu_lock */ 2675 lgrp_move_thread(tp, lpl, 1); 2676 } 2677 2678 /* 2679 * Make the thread switch to the bound CPU. 2680 * If the thread is runnable, we need to 2681 * requeue it even if t_cpu is already set 2682 * to the right CPU, since it may be on a 2683 * kpreempt queue and need to move to a local 2684 * queue. We could check t_disp_queue to 2685 * avoid unnecessary overhead if it's already 2686 * on the right queue, but since this isn't 2687 * a performance-critical operation it doesn't 2688 * seem worth the extra code and complexity. 2689 * 2690 * If the thread is weakbound to the cpu then it will 2691 * resist the new binding request until the weak 2692 * binding drops. The cpu_surrender or requeueing 2693 * below could be skipped in such cases (since it 2694 * will have no effect), but that would require 2695 * thread_allowmigrate to acquire thread_lock so 2696 * we'll take the very occasional hit here instead. 2697 */ 2698 if (tp->t_state == TS_ONPROC) { 2699 cpu_surrender(tp); 2700 } else if (tp->t_state == TS_RUN) { 2701 cpu_t *ocp = tp->t_cpu; 2702 2703 (void) dispdeq(tp); 2704 setbackdq(tp); 2705 /* 2706 * Either on the bound CPU's disp queue now, 2707 * or swapped out or on the swap queue. 2708 */ 2709 ASSERT(tp->t_disp_queue == cp->cpu_disp || 2710 tp->t_weakbound_cpu == ocp || 2711 (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) 2712 != TS_LOAD); 2713 } 2714 } 2715 } 2716 2717 /* 2718 * Our binding has changed; set TP_CHANGEBIND. 2719 */ 2720 tp->t_proc_flag |= TP_CHANGEBIND; 2721 aston(tp); 2722 2723 thread_unlock(tp); 2724 2725 return (0); 2726 } 2727 2728 #if CPUSET_WORDS > 1 2729 2730 /* 2731 * Functions for implementing cpuset operations when a cpuset is more 2732 * than one word. On platforms where a cpuset is a single word these 2733 * are implemented as macros in cpuvar.h. 2734 */ 2735 2736 void 2737 cpuset_all(cpuset_t *s) 2738 { 2739 int i; 2740 2741 for (i = 0; i < CPUSET_WORDS; i++) 2742 s->cpub[i] = ~0UL; 2743 } 2744 2745 void 2746 cpuset_all_but(cpuset_t *s, uint_t cpu) 2747 { 2748 cpuset_all(s); 2749 CPUSET_DEL(*s, cpu); 2750 } 2751 2752 void 2753 cpuset_only(cpuset_t *s, uint_t cpu) 2754 { 2755 CPUSET_ZERO(*s); 2756 CPUSET_ADD(*s, cpu); 2757 } 2758 2759 int 2760 cpuset_isnull(cpuset_t *s) 2761 { 2762 int i; 2763 2764 for (i = 0; i < CPUSET_WORDS; i++) 2765 if (s->cpub[i] != 0) 2766 return (0); 2767 return (1); 2768 } 2769 2770 int 2771 cpuset_cmp(cpuset_t *s1, cpuset_t *s2) 2772 { 2773 int i; 2774 2775 for (i = 0; i < CPUSET_WORDS; i++) 2776 if (s1->cpub[i] != s2->cpub[i]) 2777 return (0); 2778 return (1); 2779 } 2780 2781 uint_t 2782 cpuset_find(cpuset_t *s) 2783 { 2784 2785 uint_t i; 2786 uint_t cpu = (uint_t)-1; 2787 2788 /* 2789 * Find a cpu in the cpuset 2790 */ 2791 for (i = 0; i < CPUSET_WORDS; i++) { 2792 cpu = (uint_t)(lowbit(s->cpub[i]) - 1); 2793 if (cpu != (uint_t)-1) { 2794 cpu += i * BT_NBIPUL; 2795 break; 2796 } 2797 } 2798 return (cpu); 2799 } 2800 2801 void 2802 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid) 2803 { 2804 int i, j; 2805 uint_t bit; 2806 2807 /* 2808 * First, find the smallest cpu id in the set. 2809 */ 2810 for (i = 0; i < CPUSET_WORDS; i++) { 2811 if (s->cpub[i] != 0) { 2812 bit = (uint_t)(lowbit(s->cpub[i]) - 1); 2813 ASSERT(bit != (uint_t)-1); 2814 *smallestid = bit + (i * BT_NBIPUL); 2815 2816 /* 2817 * Now find the largest cpu id in 2818 * the set and return immediately. 2819 * Done in an inner loop to avoid 2820 * having to break out of the first 2821 * loop. 2822 */ 2823 for (j = CPUSET_WORDS - 1; j >= i; j--) { 2824 if (s->cpub[j] != 0) { 2825 bit = (uint_t)(highbit(s->cpub[j]) - 1); 2826 ASSERT(bit != (uint_t)-1); 2827 *largestid = bit + (j * BT_NBIPUL); 2828 ASSERT(*largestid >= *smallestid); 2829 return; 2830 } 2831 } 2832 2833 /* 2834 * If this code is reached, a 2835 * smallestid was found, but not a 2836 * largestid. The cpuset must have 2837 * been changed during the course 2838 * of this function call. 2839 */ 2840 ASSERT(0); 2841 } 2842 } 2843 *smallestid = *largestid = CPUSET_NOTINSET; 2844 } 2845 2846 #endif /* CPUSET_WORDS */ 2847 2848 /* 2849 * Unbind threads bound to specified CPU. 2850 * 2851 * If `unbind_all_threads' is true, unbind all user threads bound to a given 2852 * CPU. Otherwise unbind all soft-bound user threads. 2853 */ 2854 int 2855 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads) 2856 { 2857 processorid_t obind; 2858 kthread_t *tp; 2859 int ret = 0; 2860 proc_t *pp; 2861 int err, berr = 0; 2862 2863 ASSERT(MUTEX_HELD(&cpu_lock)); 2864 2865 mutex_enter(&pidlock); 2866 for (pp = practive; pp != NULL; pp = pp->p_next) { 2867 mutex_enter(&pp->p_lock); 2868 tp = pp->p_tlist; 2869 /* 2870 * Skip zombies, kernel processes, and processes in 2871 * other zones, if called from a non-global zone. 2872 */ 2873 if (tp == NULL || (pp->p_flag & SSYS) || 2874 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) { 2875 mutex_exit(&pp->p_lock); 2876 continue; 2877 } 2878 do { 2879 if (tp->t_bind_cpu != cpu) 2880 continue; 2881 /* 2882 * Skip threads with hard binding when 2883 * `unbind_all_threads' is not specified. 2884 */ 2885 if (!unbind_all_threads && TB_CPU_IS_HARD(tp)) 2886 continue; 2887 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr); 2888 if (ret == 0) 2889 ret = err; 2890 } while ((tp = tp->t_forw) != pp->p_tlist); 2891 mutex_exit(&pp->p_lock); 2892 } 2893 mutex_exit(&pidlock); 2894 if (ret == 0) 2895 ret = berr; 2896 return (ret); 2897 } 2898 2899 2900 /* 2901 * Destroy all remaining bound threads on a cpu. 2902 */ 2903 void 2904 cpu_destroy_bound_threads(cpu_t *cp) 2905 { 2906 extern id_t syscid; 2907 register kthread_id_t t, tlist, tnext; 2908 2909 /* 2910 * Destroy all remaining bound threads on the cpu. This 2911 * should include both the interrupt threads and the idle thread. 2912 * This requires some care, since we need to traverse the 2913 * thread list with the pidlock mutex locked, but thread_free 2914 * also locks the pidlock mutex. So, we collect the threads 2915 * we're going to reap in a list headed by "tlist", then we 2916 * unlock the pidlock mutex and traverse the tlist list, 2917 * doing thread_free's on the thread's. Simple, n'est pas? 2918 * Also, this depends on thread_free not mucking with the 2919 * t_next and t_prev links of the thread. 2920 */ 2921 2922 if ((t = curthread) != NULL) { 2923 2924 tlist = NULL; 2925 mutex_enter(&pidlock); 2926 do { 2927 tnext = t->t_next; 2928 if (t->t_bound_cpu == cp) { 2929 2930 /* 2931 * We've found a bound thread, carefully unlink 2932 * it out of the thread list, and add it to 2933 * our "tlist". We "know" we don't have to 2934 * worry about unlinking curthread (the thread 2935 * that is executing this code). 2936 */ 2937 t->t_next->t_prev = t->t_prev; 2938 t->t_prev->t_next = t->t_next; 2939 t->t_next = tlist; 2940 tlist = t; 2941 ASSERT(t->t_cid == syscid); 2942 /* wake up anyone blocked in thread_join */ 2943 cv_broadcast(&t->t_joincv); 2944 /* 2945 * t_lwp set by interrupt threads and not 2946 * cleared. 2947 */ 2948 t->t_lwp = NULL; 2949 /* 2950 * Pause and idle threads always have 2951 * t_state set to TS_ONPROC. 2952 */ 2953 t->t_state = TS_FREE; 2954 t->t_prev = NULL; /* Just in case */ 2955 } 2956 2957 } while ((t = tnext) != curthread); 2958 2959 mutex_exit(&pidlock); 2960 2961 mutex_sync(); 2962 for (t = tlist; t != NULL; t = tnext) { 2963 tnext = t->t_next; 2964 thread_free(t); 2965 } 2966 } 2967 } 2968 2969 /* 2970 * Update the cpu_supp_freqs of this cpu. This information is returned 2971 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then 2972 * maintain the kstat data size. 2973 */ 2974 void 2975 cpu_set_supp_freqs(cpu_t *cp, const char *freqs) 2976 { 2977 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */ 2978 const char *lfreqs = clkstr; 2979 boolean_t kstat_exists = B_FALSE; 2980 kstat_t *ksp; 2981 size_t len; 2982 2983 /* 2984 * A NULL pointer means we only support one speed. 2985 */ 2986 if (freqs == NULL) 2987 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64, 2988 cp->cpu_curr_clock); 2989 else 2990 lfreqs = freqs; 2991 2992 /* 2993 * Make sure the frequency doesn't change while a snapshot is 2994 * going on. Of course, we only need to worry about this if 2995 * the kstat exists. 2996 */ 2997 if ((ksp = cp->cpu_info_kstat) != NULL) { 2998 mutex_enter(ksp->ks_lock); 2999 kstat_exists = B_TRUE; 3000 } 3001 3002 /* 3003 * Free any previously allocated string and if the kstat 3004 * already exists, then update its data size. 3005 */ 3006 if (cp->cpu_supp_freqs != NULL) { 3007 len = strlen(cp->cpu_supp_freqs) + 1; 3008 kmem_free(cp->cpu_supp_freqs, len); 3009 if (kstat_exists) 3010 ksp->ks_data_size -= len; 3011 } 3012 3013 /* 3014 * Allocate the new string and set the pointer. 3015 */ 3016 len = strlen(lfreqs) + 1; 3017 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP); 3018 (void) strcpy(cp->cpu_supp_freqs, lfreqs); 3019 3020 /* 3021 * If the kstat already exists then update the data size and 3022 * free the lock. 3023 */ 3024 if (kstat_exists) { 3025 ksp->ks_data_size += len; 3026 mutex_exit(ksp->ks_lock); 3027 } 3028 } 3029 3030 /* 3031 * Indicate the current CPU's clock freqency (in Hz). 3032 * The calling context must be such that CPU references are safe. 3033 */ 3034 void 3035 cpu_set_curr_clock(uint64_t new_clk) 3036 { 3037 uint64_t old_clk; 3038 3039 old_clk = CPU->cpu_curr_clock; 3040 CPU->cpu_curr_clock = new_clk; 3041 3042 /* 3043 * The cpu-change-speed DTrace probe exports the frequency in Hz 3044 */ 3045 DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id, 3046 uint64_t, old_clk, uint64_t, new_clk); 3047 } 3048 3049 /* 3050 * processor_info(2) and p_online(2) status support functions 3051 * The constants returned by the cpu_get_state() and cpu_get_state_str() are 3052 * for use in communicating processor state information to userland. Kernel 3053 * subsystems should only be using the cpu_flags value directly. Subsystems 3054 * modifying cpu_flags should record the state change via a call to the 3055 * cpu_set_state(). 3056 */ 3057 3058 /* 3059 * Update the pi_state of this CPU. This function provides the CPU status for 3060 * the information returned by processor_info(2). 3061 */ 3062 void 3063 cpu_set_state(cpu_t *cpu) 3064 { 3065 ASSERT(MUTEX_HELD(&cpu_lock)); 3066 cpu->cpu_type_info.pi_state = cpu_get_state(cpu); 3067 cpu->cpu_state_begin = gethrestime_sec(); 3068 pool_cpu_mod = gethrtime(); 3069 } 3070 3071 /* 3072 * Return offline/online/other status for the indicated CPU. Use only for 3073 * communication with user applications; cpu_flags provides the in-kernel 3074 * interface. 3075 */ 3076 int 3077 cpu_get_state(cpu_t *cpu) 3078 { 3079 ASSERT(MUTEX_HELD(&cpu_lock)); 3080 if (cpu->cpu_flags & CPU_POWEROFF) 3081 return (P_POWEROFF); 3082 else if (cpu->cpu_flags & CPU_FAULTED) 3083 return (P_FAULTED); 3084 else if (cpu->cpu_flags & CPU_SPARE) 3085 return (P_SPARE); 3086 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) 3087 return (P_OFFLINE); 3088 else if (cpu->cpu_flags & CPU_ENABLE) 3089 return (P_ONLINE); 3090 else 3091 return (P_NOINTR); 3092 } 3093 3094 /* 3095 * Return processor_info(2) state as a string. 3096 */ 3097 const char * 3098 cpu_get_state_str(cpu_t *cpu) 3099 { 3100 const char *string; 3101 3102 switch (cpu_get_state(cpu)) { 3103 case P_ONLINE: 3104 string = PS_ONLINE; 3105 break; 3106 case P_POWEROFF: 3107 string = PS_POWEROFF; 3108 break; 3109 case P_NOINTR: 3110 string = PS_NOINTR; 3111 break; 3112 case P_SPARE: 3113 string = PS_SPARE; 3114 break; 3115 case P_FAULTED: 3116 string = PS_FAULTED; 3117 break; 3118 case P_OFFLINE: 3119 string = PS_OFFLINE; 3120 break; 3121 default: 3122 string = "unknown"; 3123 break; 3124 } 3125 return (string); 3126 } 3127 3128 /* 3129 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named 3130 * kstats, respectively. This is done when a CPU is initialized or placed 3131 * online via p_online(2). 3132 */ 3133 static void 3134 cpu_stats_kstat_create(cpu_t *cp) 3135 { 3136 int instance = cp->cpu_id; 3137 char *module = "cpu"; 3138 char *class = "misc"; 3139 kstat_t *ksp; 3140 zoneid_t zoneid; 3141 3142 ASSERT(MUTEX_HELD(&cpu_lock)); 3143 3144 if (pool_pset_enabled()) 3145 zoneid = GLOBAL_ZONEID; 3146 else 3147 zoneid = ALL_ZONES; 3148 /* 3149 * Create named kstats 3150 */ 3151 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \ 3152 ksp = kstat_create_zone(module, instance, (name), class, \ 3153 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \ 3154 zoneid); \ 3155 if (ksp != NULL) { \ 3156 ksp->ks_private = cp; \ 3157 ksp->ks_update = (update_func); \ 3158 kstat_install(ksp); \ 3159 } else \ 3160 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \ 3161 module, instance, (name)); 3162 3163 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template), 3164 cpu_sys_stats_ks_update); 3165 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template), 3166 cpu_vm_stats_ks_update); 3167 3168 /* 3169 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat. 3170 */ 3171 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL, 3172 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid); 3173 if (ksp != NULL) { 3174 ksp->ks_update = cpu_stat_ks_update; 3175 ksp->ks_private = cp; 3176 kstat_install(ksp); 3177 } 3178 } 3179 3180 static void 3181 cpu_stats_kstat_destroy(cpu_t *cp) 3182 { 3183 char ks_name[KSTAT_STRLEN]; 3184 3185 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id); 3186 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name); 3187 3188 kstat_delete_byname("cpu", cp->cpu_id, "sys"); 3189 kstat_delete_byname("cpu", cp->cpu_id, "vm"); 3190 } 3191 3192 static int 3193 cpu_sys_stats_ks_update(kstat_t *ksp, int rw) 3194 { 3195 cpu_t *cp = (cpu_t *)ksp->ks_private; 3196 struct cpu_sys_stats_ks_data *csskd; 3197 cpu_sys_stats_t *css; 3198 hrtime_t msnsecs[NCMSTATES]; 3199 int i; 3200 3201 if (rw == KSTAT_WRITE) 3202 return (EACCES); 3203 3204 csskd = ksp->ks_data; 3205 css = &cp->cpu_stats.sys; 3206 3207 /* 3208 * Read CPU mstate, but compare with the last values we 3209 * received to make sure that the returned kstats never 3210 * decrease. 3211 */ 3212 3213 get_cpu_mstate(cp, msnsecs); 3214 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE]) 3215 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64; 3216 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER]) 3217 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64; 3218 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM]) 3219 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64; 3220 3221 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data, 3222 sizeof (cpu_sys_stats_ks_data_template)); 3223 3224 csskd->cpu_ticks_wait.value.ui64 = 0; 3225 csskd->wait_ticks_io.value.ui64 = 0; 3226 3227 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE]; 3228 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER]; 3229 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM]; 3230 csskd->cpu_ticks_idle.value.ui64 = 3231 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64); 3232 csskd->cpu_ticks_user.value.ui64 = 3233 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64); 3234 csskd->cpu_ticks_kernel.value.ui64 = 3235 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64); 3236 csskd->cpu_nsec_dtrace.value.ui64 = cp->cpu_dtrace_nsec; 3237 csskd->dtrace_probes.value.ui64 = cp->cpu_dtrace_probes; 3238 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast; 3239 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload; 3240 csskd->bread.value.ui64 = css->bread; 3241 csskd->bwrite.value.ui64 = css->bwrite; 3242 csskd->lread.value.ui64 = css->lread; 3243 csskd->lwrite.value.ui64 = css->lwrite; 3244 csskd->phread.value.ui64 = css->phread; 3245 csskd->phwrite.value.ui64 = css->phwrite; 3246 csskd->pswitch.value.ui64 = css->pswitch; 3247 csskd->trap.value.ui64 = css->trap; 3248 csskd->intr.value.ui64 = 0; 3249 for (i = 0; i < PIL_MAX; i++) 3250 csskd->intr.value.ui64 += css->intr[i]; 3251 csskd->syscall.value.ui64 = css->syscall; 3252 csskd->sysread.value.ui64 = css->sysread; 3253 csskd->syswrite.value.ui64 = css->syswrite; 3254 csskd->sysfork.value.ui64 = css->sysfork; 3255 csskd->sysvfork.value.ui64 = css->sysvfork; 3256 csskd->sysexec.value.ui64 = css->sysexec; 3257 csskd->readch.value.ui64 = css->readch; 3258 csskd->writech.value.ui64 = css->writech; 3259 csskd->rcvint.value.ui64 = css->rcvint; 3260 csskd->xmtint.value.ui64 = css->xmtint; 3261 csskd->mdmint.value.ui64 = css->mdmint; 3262 csskd->rawch.value.ui64 = css->rawch; 3263 csskd->canch.value.ui64 = css->canch; 3264 csskd->outch.value.ui64 = css->outch; 3265 csskd->msg.value.ui64 = css->msg; 3266 csskd->sema.value.ui64 = css->sema; 3267 csskd->namei.value.ui64 = css->namei; 3268 csskd->ufsiget.value.ui64 = css->ufsiget; 3269 csskd->ufsdirblk.value.ui64 = css->ufsdirblk; 3270 csskd->ufsipage.value.ui64 = css->ufsipage; 3271 csskd->ufsinopage.value.ui64 = css->ufsinopage; 3272 csskd->procovf.value.ui64 = css->procovf; 3273 csskd->intrthread.value.ui64 = 0; 3274 for (i = 0; i < LOCK_LEVEL - 1; i++) 3275 csskd->intrthread.value.ui64 += css->intr[i]; 3276 csskd->intrblk.value.ui64 = css->intrblk; 3277 csskd->intrunpin.value.ui64 = css->intrunpin; 3278 csskd->idlethread.value.ui64 = css->idlethread; 3279 csskd->inv_swtch.value.ui64 = css->inv_swtch; 3280 csskd->nthreads.value.ui64 = css->nthreads; 3281 csskd->cpumigrate.value.ui64 = css->cpumigrate; 3282 csskd->xcalls.value.ui64 = css->xcalls; 3283 csskd->mutex_adenters.value.ui64 = css->mutex_adenters; 3284 csskd->rw_rdfails.value.ui64 = css->rw_rdfails; 3285 csskd->rw_wrfails.value.ui64 = css->rw_wrfails; 3286 csskd->modload.value.ui64 = css->modload; 3287 csskd->modunload.value.ui64 = css->modunload; 3288 csskd->bawrite.value.ui64 = css->bawrite; 3289 csskd->iowait.value.ui64 = css->iowait; 3290 3291 return (0); 3292 } 3293 3294 static int 3295 cpu_vm_stats_ks_update(kstat_t *ksp, int rw) 3296 { 3297 cpu_t *cp = (cpu_t *)ksp->ks_private; 3298 struct cpu_vm_stats_ks_data *cvskd; 3299 cpu_vm_stats_t *cvs; 3300 3301 if (rw == KSTAT_WRITE) 3302 return (EACCES); 3303 3304 cvs = &cp->cpu_stats.vm; 3305 cvskd = ksp->ks_data; 3306 3307 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data, 3308 sizeof (cpu_vm_stats_ks_data_template)); 3309 cvskd->pgrec.value.ui64 = cvs->pgrec; 3310 cvskd->pgfrec.value.ui64 = cvs->pgfrec; 3311 cvskd->pgin.value.ui64 = cvs->pgin; 3312 cvskd->pgpgin.value.ui64 = cvs->pgpgin; 3313 cvskd->pgout.value.ui64 = cvs->pgout; 3314 cvskd->pgpgout.value.ui64 = cvs->pgpgout; 3315 cvskd->swapin.value.ui64 = cvs->swapin; 3316 cvskd->pgswapin.value.ui64 = cvs->pgswapin; 3317 cvskd->swapout.value.ui64 = cvs->swapout; 3318 cvskd->pgswapout.value.ui64 = cvs->pgswapout; 3319 cvskd->zfod.value.ui64 = cvs->zfod; 3320 cvskd->dfree.value.ui64 = cvs->dfree; 3321 cvskd->scan.value.ui64 = cvs->scan; 3322 cvskd->rev.value.ui64 = cvs->rev; 3323 cvskd->hat_fault.value.ui64 = cvs->hat_fault; 3324 cvskd->as_fault.value.ui64 = cvs->as_fault; 3325 cvskd->maj_fault.value.ui64 = cvs->maj_fault; 3326 cvskd->cow_fault.value.ui64 = cvs->cow_fault; 3327 cvskd->prot_fault.value.ui64 = cvs->prot_fault; 3328 cvskd->softlock.value.ui64 = cvs->softlock; 3329 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt; 3330 cvskd->pgrrun.value.ui64 = cvs->pgrrun; 3331 cvskd->execpgin.value.ui64 = cvs->execpgin; 3332 cvskd->execpgout.value.ui64 = cvs->execpgout; 3333 cvskd->execfree.value.ui64 = cvs->execfree; 3334 cvskd->anonpgin.value.ui64 = cvs->anonpgin; 3335 cvskd->anonpgout.value.ui64 = cvs->anonpgout; 3336 cvskd->anonfree.value.ui64 = cvs->anonfree; 3337 cvskd->fspgin.value.ui64 = cvs->fspgin; 3338 cvskd->fspgout.value.ui64 = cvs->fspgout; 3339 cvskd->fsfree.value.ui64 = cvs->fsfree; 3340 3341 return (0); 3342 } 3343 3344 static int 3345 cpu_stat_ks_update(kstat_t *ksp, int rw) 3346 { 3347 cpu_stat_t *cso; 3348 cpu_t *cp; 3349 int i; 3350 hrtime_t msnsecs[NCMSTATES]; 3351 3352 cso = (cpu_stat_t *)ksp->ks_data; 3353 cp = (cpu_t *)ksp->ks_private; 3354 3355 if (rw == KSTAT_WRITE) 3356 return (EACCES); 3357 3358 /* 3359 * Read CPU mstate, but compare with the last values we 3360 * received to make sure that the returned kstats never 3361 * decrease. 3362 */ 3363 3364 get_cpu_mstate(cp, msnsecs); 3365 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3366 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]); 3367 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3368 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE]) 3369 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE]; 3370 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER]) 3371 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER]; 3372 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM]) 3373 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM]; 3374 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0; 3375 cso->cpu_sysinfo.wait[W_IO] = 0; 3376 cso->cpu_sysinfo.wait[W_SWAP] = 0; 3377 cso->cpu_sysinfo.wait[W_PIO] = 0; 3378 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread); 3379 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite); 3380 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread); 3381 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite); 3382 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread); 3383 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite); 3384 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch); 3385 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap); 3386 cso->cpu_sysinfo.intr = 0; 3387 for (i = 0; i < PIL_MAX; i++) 3388 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]); 3389 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall); 3390 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread); 3391 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite); 3392 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork); 3393 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork); 3394 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec); 3395 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch); 3396 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech); 3397 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint); 3398 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint); 3399 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint); 3400 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch); 3401 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch); 3402 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch); 3403 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg); 3404 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema); 3405 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei); 3406 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget); 3407 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk); 3408 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage); 3409 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage); 3410 cso->cpu_sysinfo.inodeovf = 0; 3411 cso->cpu_sysinfo.fileovf = 0; 3412 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf); 3413 cso->cpu_sysinfo.intrthread = 0; 3414 for (i = 0; i < LOCK_LEVEL - 1; i++) 3415 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]); 3416 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk); 3417 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread); 3418 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch); 3419 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads); 3420 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate); 3421 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls); 3422 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters); 3423 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails); 3424 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails); 3425 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload); 3426 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload); 3427 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite); 3428 cso->cpu_sysinfo.rw_enters = 0; 3429 cso->cpu_sysinfo.win_uo_cnt = 0; 3430 cso->cpu_sysinfo.win_uu_cnt = 0; 3431 cso->cpu_sysinfo.win_so_cnt = 0; 3432 cso->cpu_sysinfo.win_su_cnt = 0; 3433 cso->cpu_sysinfo.win_suo_cnt = 0; 3434 3435 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait); 3436 cso->cpu_syswait.swap = 0; 3437 cso->cpu_syswait.physio = 0; 3438 3439 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec); 3440 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec); 3441 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin); 3442 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin); 3443 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout); 3444 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout); 3445 cso->cpu_vminfo.swapin = CPU_STATS(cp, vm.swapin); 3446 cso->cpu_vminfo.pgswapin = CPU_STATS(cp, vm.pgswapin); 3447 cso->cpu_vminfo.swapout = CPU_STATS(cp, vm.swapout); 3448 cso->cpu_vminfo.pgswapout = CPU_STATS(cp, vm.pgswapout); 3449 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod); 3450 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree); 3451 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan); 3452 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev); 3453 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault); 3454 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault); 3455 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault); 3456 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault); 3457 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault); 3458 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock); 3459 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt); 3460 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun); 3461 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin); 3462 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout); 3463 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree); 3464 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin); 3465 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout); 3466 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree); 3467 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin); 3468 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout); 3469 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree); 3470 3471 return (0); 3472 }