Print this page
10924 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/cpu.c
+++ new/usr/src/uts/common/os/cpu.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 24 * Copyright 2019 Joyent, Inc.
25 25 */
26 26
27 27 /*
28 28 * Architecture-independent CPU control functions.
29 29 */
30 30
31 31 #include <sys/types.h>
32 32 #include <sys/param.h>
33 33 #include <sys/var.h>
34 34 #include <sys/thread.h>
35 35 #include <sys/cpuvar.h>
36 36 #include <sys/cpu_event.h>
37 37 #include <sys/kstat.h>
38 38 #include <sys/uadmin.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/errno.h>
41 41 #include <sys/cmn_err.h>
42 42 #include <sys/procset.h>
43 43 #include <sys/processor.h>
44 44 #include <sys/debug.h>
45 45 #include <sys/cpupart.h>
46 46 #include <sys/lgrp.h>
47 47 #include <sys/pset.h>
48 48 #include <sys/pghw.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */
51 51 #include <sys/atomic.h>
52 52 #include <sys/callb.h>
53 53 #include <sys/vtrace.h>
54 54 #include <sys/cyclic.h>
55 55 #include <sys/bitmap.h>
56 56 #include <sys/nvpair.h>
57 57 #include <sys/pool_pset.h>
58 58 #include <sys/msacct.h>
59 59 #include <sys/time.h>
60 60 #include <sys/archsystm.h>
61 61 #include <sys/sdt.h>
62 62 #if defined(__x86) || defined(__amd64)
63 63 #include <sys/x86_archext.h>
64 64 #endif
65 65 #include <sys/callo.h>
66 66
67 67 extern int mp_cpu_start(cpu_t *);
68 68 extern int mp_cpu_stop(cpu_t *);
69 69 extern int mp_cpu_poweron(cpu_t *);
70 70 extern int mp_cpu_poweroff(cpu_t *);
71 71 extern int mp_cpu_configure(int);
72 72 extern int mp_cpu_unconfigure(int);
73 73 extern void mp_cpu_faulted_enter(cpu_t *);
74 74 extern void mp_cpu_faulted_exit(cpu_t *);
75 75
76 76 extern int cmp_cpu_to_chip(processorid_t cpuid);
77 77 #ifdef __sparcv9
78 78 extern char *cpu_fru_fmri(cpu_t *cp);
79 79 #endif
80 80
81 81 static void cpu_add_active_internal(cpu_t *cp);
82 82 static void cpu_remove_active(cpu_t *cp);
83 83 static void cpu_info_kstat_create(cpu_t *cp);
84 84 static void cpu_info_kstat_destroy(cpu_t *cp);
85 85 static void cpu_stats_kstat_create(cpu_t *cp);
86 86 static void cpu_stats_kstat_destroy(cpu_t *cp);
87 87
88 88 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw);
89 89 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw);
90 90 static int cpu_stat_ks_update(kstat_t *ksp, int rw);
91 91 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t);
92 92
93 93 /*
94 94 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
95 95 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with
96 96 * respect to related locks is:
97 97 *
98 98 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock()
99 99 *
100 100 * Warning: Certain sections of code do not use the cpu_lock when
101 101 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since
102 102 * all cpus are paused during modifications to this list, a solution
103 103 * to protect the list is too either disable kernel preemption while
104 104 * walking the list, *or* recheck the cpu_next pointer at each
105 105 * iteration in the loop. Note that in no cases can any cached
106 106 * copies of the cpu pointers be kept as they may become invalid.
107 107 */
108 108 kmutex_t cpu_lock;
109 109 cpu_t *cpu_list; /* list of all CPUs */
110 110 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */
111 111 cpu_t *cpu_active; /* list of active CPUs */
112 112 cpuset_t cpu_active_set; /* cached set of active CPUs */
113 113 static cpuset_t cpu_available; /* set of available CPUs */
114 114 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */
115 115
116 116 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */
117 117
118 118 /*
119 119 * max_ncpus keeps the max cpus the system can have. Initially
120 120 * it's NCPU, but since most archs scan the devtree for cpus
121 121 * fairly early on during boot, the real max can be known before
122 122 * ncpus is set (useful for early NCPU based allocations).
123 123 */
124 124 int max_ncpus = NCPU;
125 125 /*
126 126 * platforms that set max_ncpus to maxiumum number of cpus that can be
127 127 * dynamically added will set boot_max_ncpus to the number of cpus found
128 128 * at device tree scan time during boot.
129 129 */
130 130 int boot_max_ncpus = -1;
131 131 int boot_ncpus = -1;
132 132 /*
133 133 * Maximum possible CPU id. This can never be >= NCPU since NCPU is
134 134 * used to size arrays that are indexed by CPU id.
135 135 */
136 136 processorid_t max_cpuid = NCPU - 1;
137 137
138 138 /*
139 139 * Maximum cpu_seqid was given. This number can only grow and never shrink. It
140 140 * can be used to optimize NCPU loops to avoid going through CPUs which were
141 141 * never on-line.
142 142 */
143 143 processorid_t max_cpu_seqid_ever = 0;
144 144
145 145 int ncpus = 1;
146 146 int ncpus_online = 1;
147 147
148 148 /*
149 149 * CPU that we're trying to offline. Protected by cpu_lock.
150 150 */
151 151 cpu_t *cpu_inmotion;
152 152
153 153 /*
154 154 * Can be raised to suppress further weakbinding, which are instead
155 155 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock,
156 156 * while individual thread weakbinding synchronization is done under thread
157 157 * lock.
158 158 */
159 159 int weakbindingbarrier;
160 160
161 161 /*
162 162 * Variables used in pause_cpus().
163 163 */
164 164 static volatile char safe_list[NCPU];
165 165
166 166 static struct _cpu_pause_info {
167 167 int cp_spl; /* spl saved in pause_cpus() */
168 168 volatile int cp_go; /* Go signal sent after all ready */
169 169 int cp_count; /* # of CPUs to pause */
170 170 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */
171 171 kthread_id_t cp_paused;
172 172 void *(*cp_func)(void *);
173 173 } cpu_pause_info;
174 174
175 175 static kmutex_t pause_free_mutex;
176 176 static kcondvar_t pause_free_cv;
177 177
178 178
179 179 static struct cpu_sys_stats_ks_data {
180 180 kstat_named_t cpu_ticks_idle;
181 181 kstat_named_t cpu_ticks_user;
182 182 kstat_named_t cpu_ticks_kernel;
183 183 kstat_named_t cpu_ticks_wait;
184 184 kstat_named_t cpu_nsec_idle;
185 185 kstat_named_t cpu_nsec_user;
186 186 kstat_named_t cpu_nsec_kernel;
187 187 kstat_named_t cpu_nsec_dtrace;
188 188 kstat_named_t cpu_nsec_intr;
189 189 kstat_named_t cpu_load_intr;
190 190 kstat_named_t wait_ticks_io;
191 191 kstat_named_t dtrace_probes;
192 192 kstat_named_t bread;
193 193 kstat_named_t bwrite;
194 194 kstat_named_t lread;
195 195 kstat_named_t lwrite;
196 196 kstat_named_t phread;
197 197 kstat_named_t phwrite;
198 198 kstat_named_t pswitch;
199 199 kstat_named_t trap;
200 200 kstat_named_t intr;
201 201 kstat_named_t syscall;
202 202 kstat_named_t sysread;
203 203 kstat_named_t syswrite;
204 204 kstat_named_t sysfork;
205 205 kstat_named_t sysvfork;
206 206 kstat_named_t sysexec;
207 207 kstat_named_t readch;
208 208 kstat_named_t writech;
209 209 kstat_named_t rcvint;
210 210 kstat_named_t xmtint;
211 211 kstat_named_t mdmint;
212 212 kstat_named_t rawch;
213 213 kstat_named_t canch;
214 214 kstat_named_t outch;
215 215 kstat_named_t msg;
216 216 kstat_named_t sema;
217 217 kstat_named_t namei;
218 218 kstat_named_t ufsiget;
219 219 kstat_named_t ufsdirblk;
220 220 kstat_named_t ufsipage;
221 221 kstat_named_t ufsinopage;
222 222 kstat_named_t procovf;
223 223 kstat_named_t intrthread;
224 224 kstat_named_t intrblk;
225 225 kstat_named_t intrunpin;
226 226 kstat_named_t idlethread;
227 227 kstat_named_t inv_swtch;
228 228 kstat_named_t nthreads;
229 229 kstat_named_t cpumigrate;
230 230 kstat_named_t xcalls;
231 231 kstat_named_t mutex_adenters;
232 232 kstat_named_t rw_rdfails;
233 233 kstat_named_t rw_wrfails;
234 234 kstat_named_t modload;
235 235 kstat_named_t modunload;
236 236 kstat_named_t bawrite;
237 237 kstat_named_t iowait;
238 238 } cpu_sys_stats_ks_data_template = {
239 239 { "cpu_ticks_idle", KSTAT_DATA_UINT64 },
240 240 { "cpu_ticks_user", KSTAT_DATA_UINT64 },
241 241 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 },
242 242 { "cpu_ticks_wait", KSTAT_DATA_UINT64 },
243 243 { "cpu_nsec_idle", KSTAT_DATA_UINT64 },
244 244 { "cpu_nsec_user", KSTAT_DATA_UINT64 },
245 245 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 },
246 246 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64 },
247 247 { "cpu_nsec_intr", KSTAT_DATA_UINT64 },
248 248 { "cpu_load_intr", KSTAT_DATA_UINT64 },
249 249 { "wait_ticks_io", KSTAT_DATA_UINT64 },
250 250 { "dtrace_probes", KSTAT_DATA_UINT64 },
251 251 { "bread", KSTAT_DATA_UINT64 },
252 252 { "bwrite", KSTAT_DATA_UINT64 },
253 253 { "lread", KSTAT_DATA_UINT64 },
254 254 { "lwrite", KSTAT_DATA_UINT64 },
255 255 { "phread", KSTAT_DATA_UINT64 },
256 256 { "phwrite", KSTAT_DATA_UINT64 },
257 257 { "pswitch", KSTAT_DATA_UINT64 },
258 258 { "trap", KSTAT_DATA_UINT64 },
259 259 { "intr", KSTAT_DATA_UINT64 },
260 260 { "syscall", KSTAT_DATA_UINT64 },
261 261 { "sysread", KSTAT_DATA_UINT64 },
262 262 { "syswrite", KSTAT_DATA_UINT64 },
263 263 { "sysfork", KSTAT_DATA_UINT64 },
264 264 { "sysvfork", KSTAT_DATA_UINT64 },
265 265 { "sysexec", KSTAT_DATA_UINT64 },
266 266 { "readch", KSTAT_DATA_UINT64 },
267 267 { "writech", KSTAT_DATA_UINT64 },
268 268 { "rcvint", KSTAT_DATA_UINT64 },
269 269 { "xmtint", KSTAT_DATA_UINT64 },
270 270 { "mdmint", KSTAT_DATA_UINT64 },
271 271 { "rawch", KSTAT_DATA_UINT64 },
272 272 { "canch", KSTAT_DATA_UINT64 },
273 273 { "outch", KSTAT_DATA_UINT64 },
274 274 { "msg", KSTAT_DATA_UINT64 },
275 275 { "sema", KSTAT_DATA_UINT64 },
276 276 { "namei", KSTAT_DATA_UINT64 },
277 277 { "ufsiget", KSTAT_DATA_UINT64 },
278 278 { "ufsdirblk", KSTAT_DATA_UINT64 },
279 279 { "ufsipage", KSTAT_DATA_UINT64 },
280 280 { "ufsinopage", KSTAT_DATA_UINT64 },
281 281 { "procovf", KSTAT_DATA_UINT64 },
282 282 { "intrthread", KSTAT_DATA_UINT64 },
283 283 { "intrblk", KSTAT_DATA_UINT64 },
284 284 { "intrunpin", KSTAT_DATA_UINT64 },
285 285 { "idlethread", KSTAT_DATA_UINT64 },
286 286 { "inv_swtch", KSTAT_DATA_UINT64 },
287 287 { "nthreads", KSTAT_DATA_UINT64 },
288 288 { "cpumigrate", KSTAT_DATA_UINT64 },
289 289 { "xcalls", KSTAT_DATA_UINT64 },
290 290 { "mutex_adenters", KSTAT_DATA_UINT64 },
291 291 { "rw_rdfails", KSTAT_DATA_UINT64 },
292 292 { "rw_wrfails", KSTAT_DATA_UINT64 },
293 293 { "modload", KSTAT_DATA_UINT64 },
294 294 { "modunload", KSTAT_DATA_UINT64 },
295 295 { "bawrite", KSTAT_DATA_UINT64 },
296 296 { "iowait", KSTAT_DATA_UINT64 },
297 297 };
298 298
299 299 static struct cpu_vm_stats_ks_data {
300 300 kstat_named_t pgrec;
301 301 kstat_named_t pgfrec;
302 302 kstat_named_t pgin;
303 303 kstat_named_t pgpgin;
304 304 kstat_named_t pgout;
305 305 kstat_named_t pgpgout;
306 306 kstat_named_t swapin;
307 307 kstat_named_t pgswapin;
308 308 kstat_named_t swapout;
309 309 kstat_named_t pgswapout;
310 310 kstat_named_t zfod;
311 311 kstat_named_t dfree;
312 312 kstat_named_t scan;
313 313 kstat_named_t rev;
314 314 kstat_named_t hat_fault;
315 315 kstat_named_t as_fault;
316 316 kstat_named_t maj_fault;
317 317 kstat_named_t cow_fault;
318 318 kstat_named_t prot_fault;
319 319 kstat_named_t softlock;
320 320 kstat_named_t kernel_asflt;
321 321 kstat_named_t pgrrun;
322 322 kstat_named_t execpgin;
323 323 kstat_named_t execpgout;
324 324 kstat_named_t execfree;
325 325 kstat_named_t anonpgin;
326 326 kstat_named_t anonpgout;
327 327 kstat_named_t anonfree;
328 328 kstat_named_t fspgin;
329 329 kstat_named_t fspgout;
330 330 kstat_named_t fsfree;
331 331 } cpu_vm_stats_ks_data_template = {
332 332 { "pgrec", KSTAT_DATA_UINT64 },
333 333 { "pgfrec", KSTAT_DATA_UINT64 },
334 334 { "pgin", KSTAT_DATA_UINT64 },
335 335 { "pgpgin", KSTAT_DATA_UINT64 },
336 336 { "pgout", KSTAT_DATA_UINT64 },
337 337 { "pgpgout", KSTAT_DATA_UINT64 },
338 338 { "swapin", KSTAT_DATA_UINT64 },
339 339 { "pgswapin", KSTAT_DATA_UINT64 },
340 340 { "swapout", KSTAT_DATA_UINT64 },
341 341 { "pgswapout", KSTAT_DATA_UINT64 },
342 342 { "zfod", KSTAT_DATA_UINT64 },
343 343 { "dfree", KSTAT_DATA_UINT64 },
344 344 { "scan", KSTAT_DATA_UINT64 },
345 345 { "rev", KSTAT_DATA_UINT64 },
346 346 { "hat_fault", KSTAT_DATA_UINT64 },
347 347 { "as_fault", KSTAT_DATA_UINT64 },
348 348 { "maj_fault", KSTAT_DATA_UINT64 },
349 349 { "cow_fault", KSTAT_DATA_UINT64 },
350 350 { "prot_fault", KSTAT_DATA_UINT64 },
351 351 { "softlock", KSTAT_DATA_UINT64 },
352 352 { "kernel_asflt", KSTAT_DATA_UINT64 },
353 353 { "pgrrun", KSTAT_DATA_UINT64 },
354 354 { "execpgin", KSTAT_DATA_UINT64 },
355 355 { "execpgout", KSTAT_DATA_UINT64 },
356 356 { "execfree", KSTAT_DATA_UINT64 },
357 357 { "anonpgin", KSTAT_DATA_UINT64 },
358 358 { "anonpgout", KSTAT_DATA_UINT64 },
359 359 { "anonfree", KSTAT_DATA_UINT64 },
360 360 { "fspgin", KSTAT_DATA_UINT64 },
361 361 { "fspgout", KSTAT_DATA_UINT64 },
362 362 { "fsfree", KSTAT_DATA_UINT64 },
363 363 };
364 364
365 365 /*
366 366 * Force the specified thread to migrate to the appropriate processor.
367 367 * Called with thread lock held, returns with it dropped.
368 368 */
369 369 static void
370 370 force_thread_migrate(kthread_id_t tp)
371 371 {
372 372 ASSERT(THREAD_LOCK_HELD(tp));
373 373 if (tp == curthread) {
374 374 THREAD_TRANSITION(tp);
375 375 CL_SETRUN(tp);
376 376 thread_unlock_nopreempt(tp);
377 377 swtch();
378 378 } else {
379 379 if (tp->t_state == TS_ONPROC) {
380 380 cpu_surrender(tp);
381 381 } else if (tp->t_state == TS_RUN) {
382 382 (void) dispdeq(tp);
383 383 setbackdq(tp);
384 384 }
385 385 thread_unlock(tp);
386 386 }
387 387 }
388 388
389 389 /*
390 390 * Set affinity for a specified CPU.
391 391 *
392 392 * Specifying a cpu_id of CPU_CURRENT, allowed _only_ when setting affinity for
↓ open down ↓ |
392 lines elided |
↑ open up ↑ |
393 393 * curthread, will set affinity to the CPU on which the thread is currently
394 394 * running. For other cpu_id values, the caller must ensure that the
395 395 * referenced CPU remains valid, which can be done by holding cpu_lock across
396 396 * this call.
397 397 *
398 398 * CPU affinity is guaranteed after return of thread_affinity_set(). If a
399 399 * caller setting affinity to CPU_CURRENT requires that its thread not migrate
400 400 * CPUs prior to a successful return, it should take extra precautions (such as
401 401 * their own call to kpreempt_disable) to ensure that safety.
402 402 *
403 + * CPU_BEST can be used to pick a "best" CPU to migrate to, including
404 + * potentially the current CPU.
405 + *
403 406 * A CPU affinity reference count is maintained by thread_affinity_set and
404 407 * thread_affinity_clear (incrementing and decrementing it, respectively),
405 408 * maintaining CPU affinity while the count is non-zero, and allowing regions
406 409 * of code which require affinity to be nested.
407 410 */
408 411 void
409 412 thread_affinity_set(kthread_id_t t, int cpu_id)
410 413 {
411 414 cpu_t *cp;
412 415
413 416 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL));
414 417
415 418 if (cpu_id == CPU_CURRENT) {
416 419 VERIFY3P(t, ==, curthread);
417 420 kpreempt_disable();
418 421 cp = CPU;
422 + } else if (cpu_id == CPU_BEST) {
423 + VERIFY3P(t, ==, curthread);
424 + kpreempt_disable();
425 + cp = disp_choose_best_cpu();
419 426 } else {
420 427 /*
421 428 * We should be asserting that cpu_lock is held here, but
422 429 * the NCA code doesn't acquire it. The following assert
423 430 * should be uncommented when the NCA code is fixed.
424 431 *
425 432 * ASSERT(MUTEX_HELD(&cpu_lock));
426 433 */
427 434 VERIFY((cpu_id >= 0) && (cpu_id < NCPU));
428 435 cp = cpu[cpu_id];
429 436
430 437 /* user must provide a good cpu_id */
431 438 VERIFY(cp != NULL);
432 439 }
433 440
434 441 /*
435 442 * If there is already a hard affinity requested, and this affinity
436 443 * conflicts with that, panic.
437 444 */
438 445 thread_lock(t);
439 446 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) {
440 447 panic("affinity_set: setting %p but already bound to %p",
441 448 (void *)cp, (void *)t->t_bound_cpu);
442 449 }
443 450 t->t_affinitycnt++;
444 451 t->t_bound_cpu = cp;
445 452
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
446 453 /*
447 454 * Make sure we're running on the right CPU.
448 455 */
449 456 if (cp != t->t_cpu || t != curthread) {
450 457 ASSERT(cpu_id != CPU_CURRENT);
451 458 force_thread_migrate(t); /* drops thread lock */
452 459 } else {
453 460 thread_unlock(t);
454 461 }
455 462
456 - if (cpu_id == CPU_CURRENT) {
463 + if (cpu_id == CPU_CURRENT || cpu_id == CPU_BEST)
457 464 kpreempt_enable();
458 - }
459 465 }
460 466
461 467 /*
462 468 * Wrapper for backward compatibility.
463 469 */
464 470 void
465 471 affinity_set(int cpu_id)
466 472 {
467 473 thread_affinity_set(curthread, cpu_id);
468 474 }
469 475
470 476 /*
471 477 * Decrement the affinity reservation count and if it becomes zero,
472 478 * clear the CPU affinity for the current thread, or set it to the user's
473 479 * software binding request.
474 480 */
475 481 void
476 482 thread_affinity_clear(kthread_id_t t)
477 483 {
478 484 register processorid_t binding;
479 485
480 486 thread_lock(t);
481 487 if (--t->t_affinitycnt == 0) {
482 488 if ((binding = t->t_bind_cpu) == PBIND_NONE) {
483 489 /*
484 490 * Adjust disp_max_unbound_pri if necessary.
485 491 */
486 492 disp_adjust_unbound_pri(t);
487 493 t->t_bound_cpu = NULL;
488 494 if (t->t_cpu->cpu_part != t->t_cpupart) {
489 495 force_thread_migrate(t);
490 496 return;
491 497 }
492 498 } else {
493 499 t->t_bound_cpu = cpu[binding];
494 500 /*
495 501 * Make sure the thread is running on the bound CPU.
496 502 */
497 503 if (t->t_cpu != t->t_bound_cpu) {
498 504 force_thread_migrate(t);
499 505 return; /* already dropped lock */
500 506 }
501 507 }
502 508 }
503 509 thread_unlock(t);
504 510 }
505 511
506 512 /*
507 513 * Wrapper for backward compatibility.
508 514 */
509 515 void
510 516 affinity_clear(void)
511 517 {
512 518 thread_affinity_clear(curthread);
513 519 }
514 520
515 521 /*
516 522 * Weak cpu affinity. Bind to the "current" cpu for short periods
517 523 * of time during which the thread must not block (but may be preempted).
518 524 * Use this instead of kpreempt_disable() when it is only "no migration"
519 525 * rather than "no preemption" semantics that are required - disabling
520 526 * preemption holds higher priority threads off of cpu and if the
521 527 * operation that is protected is more than momentary this is not good
522 528 * for realtime etc.
523 529 *
524 530 * Weakly bound threads will not prevent a cpu from being offlined -
525 531 * we'll only run them on the cpu to which they are weakly bound but
526 532 * (because they do not block) we'll always be able to move them on to
527 533 * another cpu at offline time if we give them just a short moment to
528 534 * run during which they will unbind. To give a cpu a chance of offlining,
529 535 * however, we require a barrier to weak bindings that may be raised for a
530 536 * given cpu (offline/move code may set this and then wait a short time for
531 537 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
532 538 *
533 539 * There are few restrictions on the calling context of thread_nomigrate.
534 540 * The caller must not hold the thread lock. Calls may be nested.
535 541 *
536 542 * After weakbinding a thread must not perform actions that may block.
537 543 * In particular it must not call thread_affinity_set; calling that when
538 544 * already weakbound is nonsensical anyway.
539 545 *
540 546 * If curthread is prevented from migrating for other reasons
541 547 * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
542 548 * then the weak binding will succeed even if this cpu is the target of an
543 549 * offline/move request.
544 550 */
545 551 void
546 552 thread_nomigrate(void)
547 553 {
548 554 cpu_t *cp;
549 555 kthread_id_t t = curthread;
550 556
551 557 again:
552 558 kpreempt_disable();
553 559 cp = CPU;
554 560
555 561 /*
556 562 * A highlevel interrupt must not modify t_nomigrate or
557 563 * t_weakbound_cpu of the thread it has interrupted. A lowlevel
558 564 * interrupt thread cannot migrate and we can avoid the
559 565 * thread_lock call below by short-circuiting here. In either
560 566 * case we can just return since no migration is possible and
561 567 * the condition will persist (ie, when we test for these again
562 568 * in thread_allowmigrate they can't have changed). Migration
563 569 * is also impossible if we're at or above DISP_LEVEL pil.
564 570 */
565 571 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD ||
566 572 getpil() >= DISP_LEVEL) {
567 573 kpreempt_enable();
568 574 return;
569 575 }
570 576
571 577 /*
572 578 * We must be consistent with existing weak bindings. Since we
573 579 * may be interrupted between the increment of t_nomigrate and
574 580 * the store to t_weakbound_cpu below we cannot assume that
575 581 * t_weakbound_cpu will be set if t_nomigrate is. Note that we
576 582 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
577 583 * always the case.
578 584 */
579 585 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) {
580 586 if (!panicstr)
581 587 panic("thread_nomigrate: binding to %p but already "
582 588 "bound to %p", (void *)cp,
583 589 (void *)t->t_weakbound_cpu);
584 590 }
585 591
586 592 /*
587 593 * At this point we have preemption disabled and we don't yet hold
588 594 * the thread lock. So it's possible that somebody else could
589 595 * set t_bind_cpu here and not be able to force us across to the
590 596 * new cpu (since we have preemption disabled).
591 597 */
592 598 thread_lock(curthread);
593 599
594 600 /*
595 601 * If further weak bindings are being (temporarily) suppressed then
596 602 * we'll settle for disabling kernel preemption (which assures
597 603 * no migration provided the thread does not block which it is
598 604 * not allowed to if using thread_nomigrate). We must remember
599 605 * this disposition so we can take appropriate action in
600 606 * thread_allowmigrate. If this is a nested call and the
601 607 * thread is already weakbound then fall through as normal.
602 608 * We remember the decision to settle for kpreempt_disable through
603 609 * negative nesting counting in t_nomigrate. Once a thread has had one
604 610 * weakbinding request satisfied in this way any further (nested)
605 611 * requests will continue to be satisfied in the same way,
606 612 * even if weak bindings have recommenced.
607 613 */
608 614 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) {
609 615 --t->t_nomigrate;
610 616 thread_unlock(curthread);
611 617 return; /* with kpreempt_disable still active */
612 618 }
613 619
614 620 /*
615 621 * We hold thread_lock so t_bind_cpu cannot change. We could,
616 622 * however, be running on a different cpu to which we are t_bound_cpu
617 623 * to (as explained above). If we grant the weak binding request
618 624 * in that case then the dispatcher must favour our weak binding
619 625 * over our strong (in which case, just as when preemption is
620 626 * disabled, we can continue to run on a cpu other than the one to
621 627 * which we are strongbound; the difference in this case is that
622 628 * this thread can be preempted and so can appear on the dispatch
623 629 * queues of a cpu other than the one it is strongbound to).
624 630 *
625 631 * If the cpu we are running on does not appear to be a current
626 632 * offline target (we check cpu_inmotion to determine this - since
627 633 * we don't hold cpu_lock we may not see a recent store to that,
628 634 * so it's possible that we at times can grant a weak binding to a
629 635 * cpu that is an offline target, but that one request will not
630 636 * prevent the offline from succeeding) then we will always grant
631 637 * the weak binding request. This includes the case above where
632 638 * we grant a weakbinding not commensurate with our strong binding.
633 639 *
634 640 * If our cpu does appear to be an offline target then we're inclined
635 641 * not to grant the weakbinding request just yet - we'd prefer to
636 642 * migrate to another cpu and grant the request there. The
637 643 * exceptions are those cases where going through preemption code
638 644 * will not result in us changing cpu:
639 645 *
640 646 * . interrupts have already bypassed this case (see above)
641 647 * . we are already weakbound to this cpu (dispatcher code will
642 648 * always return us to the weakbound cpu)
643 649 * . preemption was disabled even before we disabled it above
644 650 * . we are strongbound to this cpu (if we're strongbound to
645 651 * another and not yet running there the trip through the
646 652 * dispatcher will move us to the strongbound cpu and we
647 653 * will grant the weak binding there)
648 654 */
649 655 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 ||
650 656 t->t_bound_cpu == cp) {
651 657 /*
652 658 * Don't be tempted to store to t_weakbound_cpu only on
653 659 * the first nested bind request - if we're interrupted
654 660 * after the increment of t_nomigrate and before the
655 661 * store to t_weakbound_cpu and the interrupt calls
656 662 * thread_nomigrate then the assertion in thread_allowmigrate
657 663 * would fail.
658 664 */
659 665 t->t_nomigrate++;
660 666 t->t_weakbound_cpu = cp;
661 667 membar_producer();
662 668 thread_unlock(curthread);
663 669 /*
664 670 * Now that we have dropped the thread_lock another thread
665 671 * can set our t_weakbound_cpu, and will try to migrate us
666 672 * to the strongbound cpu (which will not be prevented by
667 673 * preemption being disabled since we're about to enable
668 674 * preemption). We have granted the weakbinding to the current
669 675 * cpu, so again we are in the position that is is is possible
670 676 * that our weak and strong bindings differ. Again this
671 677 * is catered for by dispatcher code which will favour our
672 678 * weak binding.
673 679 */
674 680 kpreempt_enable();
675 681 } else {
676 682 /*
677 683 * Move to another cpu before granting the request by
678 684 * forcing this thread through preemption code. When we
679 685 * get to set{front,back}dq called from CL_PREEMPT()
680 686 * cpu_choose() will be used to select a cpu to queue
681 687 * us on - that will see cpu_inmotion and take
682 688 * steps to avoid returning us to this cpu.
683 689 */
684 690 cp->cpu_kprunrun = 1;
685 691 thread_unlock(curthread);
686 692 kpreempt_enable(); /* will call preempt() */
687 693 goto again;
688 694 }
689 695 }
690 696
691 697 void
692 698 thread_allowmigrate(void)
693 699 {
694 700 kthread_id_t t = curthread;
695 701
696 702 ASSERT(t->t_weakbound_cpu == CPU ||
697 703 (t->t_nomigrate < 0 && t->t_preempt > 0) ||
698 704 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD ||
699 705 getpil() >= DISP_LEVEL);
700 706
701 707 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) ||
702 708 getpil() >= DISP_LEVEL)
703 709 return;
704 710
705 711 if (t->t_nomigrate < 0) {
706 712 /*
707 713 * This thread was granted "weak binding" in the
708 714 * stronger form of kernel preemption disabling.
709 715 * Undo a level of nesting for both t_nomigrate
710 716 * and t_preempt.
711 717 */
712 718 ++t->t_nomigrate;
713 719 kpreempt_enable();
714 720 } else if (--t->t_nomigrate == 0) {
715 721 /*
716 722 * Time to drop the weak binding. We need to cater
717 723 * for the case where we're weakbound to a different
718 724 * cpu than that to which we're strongbound (a very
719 725 * temporary arrangement that must only persist until
720 726 * weak binding drops). We don't acquire thread_lock
721 727 * here so even as this code executes t_bound_cpu
722 728 * may be changing. So we disable preemption and
723 729 * a) in the case that t_bound_cpu changes while we
724 730 * have preemption disabled kprunrun will be set
725 731 * asynchronously, and b) if before disabling
726 732 * preemption we were already on a different cpu to
727 733 * our t_bound_cpu then we set kprunrun ourselves
728 734 * to force a trip through the dispatcher when
729 735 * preemption is enabled.
730 736 */
731 737 kpreempt_disable();
732 738 if (t->t_bound_cpu &&
733 739 t->t_weakbound_cpu != t->t_bound_cpu)
734 740 CPU->cpu_kprunrun = 1;
735 741 t->t_weakbound_cpu = NULL;
736 742 membar_producer();
737 743 kpreempt_enable();
738 744 }
739 745 }
740 746
741 747 /*
742 748 * weakbinding_stop can be used to temporarily cause weakbindings made
743 749 * with thread_nomigrate to be satisfied through the stronger action of
744 750 * kpreempt_disable. weakbinding_start recommences normal weakbinding.
745 751 */
746 752
747 753 void
748 754 weakbinding_stop(void)
749 755 {
750 756 ASSERT(MUTEX_HELD(&cpu_lock));
751 757 weakbindingbarrier = 1;
752 758 membar_producer(); /* make visible before subsequent thread_lock */
753 759 }
754 760
755 761 void
756 762 weakbinding_start(void)
757 763 {
758 764 ASSERT(MUTEX_HELD(&cpu_lock));
759 765 weakbindingbarrier = 0;
760 766 }
761 767
762 768 void
763 769 null_xcall(void)
764 770 {
765 771 }
766 772
767 773 /*
768 774 * This routine is called to place the CPUs in a safe place so that
769 775 * one of them can be taken off line or placed on line. What we are
770 776 * trying to do here is prevent a thread from traversing the list
771 777 * of active CPUs while we are changing it or from getting placed on
772 778 * the run queue of a CPU that has just gone off line. We do this by
773 779 * creating a thread with the highest possible prio for each CPU and
774 780 * having it call this routine. The advantage of this method is that
775 781 * we can eliminate all checks for CPU_ACTIVE in the disp routines.
776 782 * This makes disp faster at the expense of making p_online() slower
777 783 * which is a good trade off.
778 784 */
779 785 static void
780 786 cpu_pause(int index)
781 787 {
782 788 int s;
783 789 struct _cpu_pause_info *cpi = &cpu_pause_info;
784 790 volatile char *safe = &safe_list[index];
785 791 long lindex = index;
786 792
787 793 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE));
788 794
789 795 while (*safe != PAUSE_DIE) {
790 796 *safe = PAUSE_READY;
791 797 membar_enter(); /* make sure stores are flushed */
792 798 sema_v(&cpi->cp_sem); /* signal requesting thread */
793 799
794 800 /*
795 801 * Wait here until all pause threads are running. That
796 802 * indicates that it's safe to do the spl. Until
797 803 * cpu_pause_info.cp_go is set, we don't want to spl
798 804 * because that might block clock interrupts needed
799 805 * to preempt threads on other CPUs.
800 806 */
801 807 while (cpi->cp_go == 0)
802 808 ;
803 809 /*
804 810 * Even though we are at the highest disp prio, we need
805 811 * to block out all interrupts below LOCK_LEVEL so that
806 812 * an intr doesn't come in, wake up a thread, and call
807 813 * setbackdq/setfrontdq.
808 814 */
809 815 s = splhigh();
810 816 /*
811 817 * if cp_func has been set then call it using index as the
812 818 * argument, currently only used by cpr_suspend_cpus().
813 819 * This function is used as the code to execute on the
814 820 * "paused" cpu's when a machine comes out of a sleep state
815 821 * and CPU's were powered off. (could also be used for
816 822 * hotplugging CPU's).
817 823 */
818 824 if (cpi->cp_func != NULL)
819 825 (*cpi->cp_func)((void *)lindex);
820 826
821 827 mach_cpu_pause(safe);
822 828
823 829 splx(s);
824 830 /*
825 831 * Waiting is at an end. Switch out of cpu_pause
826 832 * loop and resume useful work.
827 833 */
828 834 swtch();
829 835 }
830 836
831 837 mutex_enter(&pause_free_mutex);
832 838 *safe = PAUSE_DEAD;
833 839 cv_broadcast(&pause_free_cv);
834 840 mutex_exit(&pause_free_mutex);
835 841 }
836 842
837 843 /*
838 844 * Allow the cpus to start running again.
839 845 */
840 846 void
841 847 start_cpus()
842 848 {
843 849 int i;
844 850
845 851 ASSERT(MUTEX_HELD(&cpu_lock));
846 852 ASSERT(cpu_pause_info.cp_paused);
847 853 cpu_pause_info.cp_paused = NULL;
848 854 for (i = 0; i < NCPU; i++)
849 855 safe_list[i] = PAUSE_IDLE;
850 856 membar_enter(); /* make sure stores are flushed */
851 857 affinity_clear();
852 858 splx(cpu_pause_info.cp_spl);
853 859 kpreempt_enable();
854 860 }
855 861
856 862 /*
857 863 * Allocate a pause thread for a CPU.
858 864 */
859 865 static void
860 866 cpu_pause_alloc(cpu_t *cp)
861 867 {
862 868 kthread_id_t t;
863 869 long cpun = cp->cpu_id;
864 870
865 871 /*
866 872 * Note, v.v_nglobpris will not change value as long as I hold
867 873 * cpu_lock.
868 874 */
869 875 t = thread_create(NULL, 0, cpu_pause, (void *)cpun,
870 876 0, &p0, TS_STOPPED, v.v_nglobpris - 1);
871 877 thread_lock(t);
872 878 t->t_bound_cpu = cp;
873 879 t->t_disp_queue = cp->cpu_disp;
874 880 t->t_affinitycnt = 1;
875 881 t->t_preempt = 1;
876 882 thread_unlock(t);
877 883 cp->cpu_pause_thread = t;
878 884 /*
879 885 * Registering a thread in the callback table is usually done
880 886 * in the initialization code of the thread. In this
881 887 * case, we do it right after thread creation because the
882 888 * thread itself may never run, and we need to register the
883 889 * fact that it is safe for cpr suspend.
884 890 */
885 891 CALLB_CPR_INIT_SAFE(t, "cpu_pause");
886 892 }
887 893
888 894 /*
889 895 * Free a pause thread for a CPU.
890 896 */
891 897 static void
892 898 cpu_pause_free(cpu_t *cp)
893 899 {
894 900 kthread_id_t t;
895 901 int cpun = cp->cpu_id;
896 902
897 903 ASSERT(MUTEX_HELD(&cpu_lock));
898 904 /*
899 905 * We have to get the thread and tell it to die.
900 906 */
901 907 if ((t = cp->cpu_pause_thread) == NULL) {
902 908 ASSERT(safe_list[cpun] == PAUSE_IDLE);
903 909 return;
904 910 }
905 911 thread_lock(t);
906 912 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */
907 913 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */
908 914 t->t_pri = v.v_nglobpris - 1;
909 915 ASSERT(safe_list[cpun] == PAUSE_IDLE);
910 916 safe_list[cpun] = PAUSE_DIE;
911 917 THREAD_TRANSITION(t);
912 918 setbackdq(t);
913 919 thread_unlock_nopreempt(t);
914 920
915 921 /*
916 922 * If we don't wait for the thread to actually die, it may try to
917 923 * run on the wrong cpu as part of an actual call to pause_cpus().
918 924 */
919 925 mutex_enter(&pause_free_mutex);
920 926 while (safe_list[cpun] != PAUSE_DEAD) {
921 927 cv_wait(&pause_free_cv, &pause_free_mutex);
922 928 }
923 929 mutex_exit(&pause_free_mutex);
924 930 safe_list[cpun] = PAUSE_IDLE;
925 931
926 932 cp->cpu_pause_thread = NULL;
927 933 }
928 934
929 935 /*
930 936 * Initialize basic structures for pausing CPUs.
931 937 */
932 938 void
933 939 cpu_pause_init()
934 940 {
935 941 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL);
936 942 /*
937 943 * Create initial CPU pause thread.
938 944 */
939 945 cpu_pause_alloc(CPU);
940 946 }
941 947
942 948 /*
943 949 * Start the threads used to pause another CPU.
944 950 */
945 951 static int
946 952 cpu_pause_start(processorid_t cpu_id)
947 953 {
948 954 int i;
949 955 int cpu_count = 0;
950 956
951 957 for (i = 0; i < NCPU; i++) {
952 958 cpu_t *cp;
953 959 kthread_id_t t;
954 960
955 961 cp = cpu[i];
956 962 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) {
957 963 safe_list[i] = PAUSE_WAIT;
958 964 continue;
959 965 }
960 966
961 967 /*
962 968 * Skip CPU if it is quiesced or not yet started.
963 969 */
964 970 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) {
965 971 safe_list[i] = PAUSE_WAIT;
966 972 continue;
967 973 }
968 974
969 975 /*
970 976 * Start this CPU's pause thread.
971 977 */
972 978 t = cp->cpu_pause_thread;
973 979 thread_lock(t);
974 980 /*
975 981 * Reset the priority, since nglobpris may have
976 982 * changed since the thread was created, if someone
977 983 * has loaded the RT (or some other) scheduling
978 984 * class.
979 985 */
980 986 t->t_pri = v.v_nglobpris - 1;
981 987 THREAD_TRANSITION(t);
982 988 setbackdq(t);
983 989 thread_unlock_nopreempt(t);
984 990 ++cpu_count;
985 991 }
986 992 return (cpu_count);
987 993 }
988 994
989 995
990 996 /*
991 997 * Pause all of the CPUs except the one we are on by creating a high
992 998 * priority thread bound to those CPUs.
993 999 *
994 1000 * Note that one must be extremely careful regarding code
995 1001 * executed while CPUs are paused. Since a CPU may be paused
996 1002 * while a thread scheduling on that CPU is holding an adaptive
997 1003 * lock, code executed with CPUs paused must not acquire adaptive
998 1004 * (or low-level spin) locks. Also, such code must not block,
999 1005 * since the thread that is supposed to initiate the wakeup may
1000 1006 * never run.
1001 1007 *
1002 1008 * With a few exceptions, the restrictions on code executed with CPUs
1003 1009 * paused match those for code executed at high-level interrupt
1004 1010 * context.
1005 1011 */
1006 1012 void
1007 1013 pause_cpus(cpu_t *off_cp, void *(*func)(void *))
1008 1014 {
1009 1015 processorid_t cpu_id;
1010 1016 int i;
1011 1017 struct _cpu_pause_info *cpi = &cpu_pause_info;
1012 1018
1013 1019 ASSERT(MUTEX_HELD(&cpu_lock));
1014 1020 ASSERT(cpi->cp_paused == NULL);
1015 1021 cpi->cp_count = 0;
1016 1022 cpi->cp_go = 0;
1017 1023 for (i = 0; i < NCPU; i++)
1018 1024 safe_list[i] = PAUSE_IDLE;
1019 1025 kpreempt_disable();
1020 1026
1021 1027 cpi->cp_func = func;
1022 1028
1023 1029 /*
1024 1030 * If running on the cpu that is going offline, get off it.
1025 1031 * This is so that it won't be necessary to rechoose a CPU
1026 1032 * when done.
1027 1033 */
1028 1034 if (CPU == off_cp)
1029 1035 cpu_id = off_cp->cpu_next_part->cpu_id;
1030 1036 else
1031 1037 cpu_id = CPU->cpu_id;
1032 1038 affinity_set(cpu_id);
1033 1039
1034 1040 /*
1035 1041 * Start the pause threads and record how many were started
1036 1042 */
1037 1043 cpi->cp_count = cpu_pause_start(cpu_id);
1038 1044
1039 1045 /*
1040 1046 * Now wait for all CPUs to be running the pause thread.
1041 1047 */
1042 1048 while (cpi->cp_count > 0) {
1043 1049 /*
1044 1050 * Spin reading the count without grabbing the disp
1045 1051 * lock to make sure we don't prevent the pause
1046 1052 * threads from getting the lock.
1047 1053 */
1048 1054 while (sema_held(&cpi->cp_sem))
1049 1055 ;
1050 1056 if (sema_tryp(&cpi->cp_sem))
1051 1057 --cpi->cp_count;
1052 1058 }
1053 1059 cpi->cp_go = 1; /* all have reached cpu_pause */
1054 1060
1055 1061 /*
1056 1062 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
1057 1063 * to PAUSE_WAIT.)
1058 1064 */
1059 1065 for (i = 0; i < NCPU; i++) {
1060 1066 while (safe_list[i] != PAUSE_WAIT)
1061 1067 ;
1062 1068 }
1063 1069 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */
1064 1070 cpi->cp_paused = curthread;
1065 1071 }
1066 1072
1067 1073 /*
1068 1074 * Check whether the current thread has CPUs paused
1069 1075 */
1070 1076 int
1071 1077 cpus_paused(void)
1072 1078 {
1073 1079 if (cpu_pause_info.cp_paused != NULL) {
1074 1080 ASSERT(cpu_pause_info.cp_paused == curthread);
1075 1081 return (1);
1076 1082 }
1077 1083 return (0);
1078 1084 }
1079 1085
1080 1086 static cpu_t *
1081 1087 cpu_get_all(processorid_t cpun)
1082 1088 {
1083 1089 ASSERT(MUTEX_HELD(&cpu_lock));
1084 1090
1085 1091 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun))
1086 1092 return (NULL);
1087 1093 return (cpu[cpun]);
1088 1094 }
1089 1095
1090 1096 /*
1091 1097 * Check whether cpun is a valid processor id and whether it should be
1092 1098 * visible from the current zone. If it is, return a pointer to the
1093 1099 * associated CPU structure.
1094 1100 */
1095 1101 cpu_t *
1096 1102 cpu_get(processorid_t cpun)
1097 1103 {
1098 1104 cpu_t *c;
1099 1105
1100 1106 ASSERT(MUTEX_HELD(&cpu_lock));
1101 1107 c = cpu_get_all(cpun);
1102 1108 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1103 1109 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c))
1104 1110 return (NULL);
1105 1111 return (c);
1106 1112 }
1107 1113
1108 1114 /*
1109 1115 * The following functions should be used to check CPU states in the kernel.
1110 1116 * They should be invoked with cpu_lock held. Kernel subsystems interested
1111 1117 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
1112 1118 * states. Those are for user-land (and system call) use only.
1113 1119 */
1114 1120
1115 1121 /*
1116 1122 * Determine whether the CPU is online and handling interrupts.
1117 1123 */
1118 1124 int
1119 1125 cpu_is_online(cpu_t *cpu)
1120 1126 {
1121 1127 ASSERT(MUTEX_HELD(&cpu_lock));
1122 1128 return (cpu_flagged_online(cpu->cpu_flags));
1123 1129 }
1124 1130
1125 1131 /*
1126 1132 * Determine whether the CPU is offline (this includes spare and faulted).
1127 1133 */
1128 1134 int
1129 1135 cpu_is_offline(cpu_t *cpu)
1130 1136 {
1131 1137 ASSERT(MUTEX_HELD(&cpu_lock));
1132 1138 return (cpu_flagged_offline(cpu->cpu_flags));
1133 1139 }
1134 1140
1135 1141 /*
1136 1142 * Determine whether the CPU is powered off.
1137 1143 */
1138 1144 int
1139 1145 cpu_is_poweredoff(cpu_t *cpu)
1140 1146 {
1141 1147 ASSERT(MUTEX_HELD(&cpu_lock));
1142 1148 return (cpu_flagged_poweredoff(cpu->cpu_flags));
1143 1149 }
1144 1150
1145 1151 /*
1146 1152 * Determine whether the CPU is handling interrupts.
1147 1153 */
1148 1154 int
1149 1155 cpu_is_nointr(cpu_t *cpu)
1150 1156 {
1151 1157 ASSERT(MUTEX_HELD(&cpu_lock));
1152 1158 return (cpu_flagged_nointr(cpu->cpu_flags));
1153 1159 }
1154 1160
1155 1161 /*
1156 1162 * Determine whether the CPU is active (scheduling threads).
1157 1163 */
1158 1164 int
1159 1165 cpu_is_active(cpu_t *cpu)
1160 1166 {
1161 1167 ASSERT(MUTEX_HELD(&cpu_lock));
1162 1168 return (cpu_flagged_active(cpu->cpu_flags));
1163 1169 }
1164 1170
1165 1171 /*
1166 1172 * Same as above, but these require cpu_flags instead of cpu_t pointers.
1167 1173 */
1168 1174 int
1169 1175 cpu_flagged_online(cpu_flag_t cpu_flags)
1170 1176 {
1171 1177 return (cpu_flagged_active(cpu_flags) &&
1172 1178 (cpu_flags & CPU_ENABLE));
1173 1179 }
1174 1180
1175 1181 int
1176 1182 cpu_flagged_offline(cpu_flag_t cpu_flags)
1177 1183 {
1178 1184 return (((cpu_flags & CPU_POWEROFF) == 0) &&
1179 1185 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY));
1180 1186 }
1181 1187
1182 1188 int
1183 1189 cpu_flagged_poweredoff(cpu_flag_t cpu_flags)
1184 1190 {
1185 1191 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF);
1186 1192 }
1187 1193
1188 1194 int
1189 1195 cpu_flagged_nointr(cpu_flag_t cpu_flags)
1190 1196 {
1191 1197 return (cpu_flagged_active(cpu_flags) &&
1192 1198 (cpu_flags & CPU_ENABLE) == 0);
1193 1199 }
1194 1200
1195 1201 int
1196 1202 cpu_flagged_active(cpu_flag_t cpu_flags)
1197 1203 {
1198 1204 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) &&
1199 1205 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY));
1200 1206 }
1201 1207
1202 1208 /*
1203 1209 * Bring the indicated CPU online.
1204 1210 */
1205 1211 int
1206 1212 cpu_online(cpu_t *cp)
1207 1213 {
1208 1214 int error = 0;
1209 1215
1210 1216 /*
1211 1217 * Handle on-line request.
1212 1218 * This code must put the new CPU on the active list before
1213 1219 * starting it because it will not be paused, and will start
1214 1220 * using the active list immediately. The real start occurs
1215 1221 * when the CPU_QUIESCED flag is turned off.
1216 1222 */
1217 1223
1218 1224 ASSERT(MUTEX_HELD(&cpu_lock));
1219 1225
1220 1226 /*
1221 1227 * Put all the cpus into a known safe place.
1222 1228 * No mutexes can be entered while CPUs are paused.
1223 1229 */
1224 1230 error = mp_cpu_start(cp); /* arch-dep hook */
1225 1231 if (error == 0) {
1226 1232 pg_cpupart_in(cp, cp->cpu_part);
1227 1233 pause_cpus(NULL, NULL);
1228 1234 cpu_add_active_internal(cp);
1229 1235 if (cp->cpu_flags & CPU_FAULTED) {
1230 1236 cp->cpu_flags &= ~CPU_FAULTED;
1231 1237 mp_cpu_faulted_exit(cp);
1232 1238 }
1233 1239 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN |
1234 1240 CPU_SPARE);
1235 1241 CPU_NEW_GENERATION(cp);
1236 1242 start_cpus();
1237 1243 cpu_stats_kstat_create(cp);
1238 1244 cpu_create_intrstat(cp);
1239 1245 lgrp_kstat_create(cp);
1240 1246 cpu_state_change_notify(cp->cpu_id, CPU_ON);
1241 1247 cpu_intr_enable(cp); /* arch-dep hook */
1242 1248 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
1243 1249 cpu_set_state(cp);
1244 1250 cyclic_online(cp);
1245 1251 /*
1246 1252 * This has to be called only after cyclic_online(). This
1247 1253 * function uses cyclics.
1248 1254 */
1249 1255 callout_cpu_online(cp);
1250 1256 poke_cpu(cp->cpu_id);
1251 1257 }
1252 1258
1253 1259 return (error);
1254 1260 }
1255 1261
1256 1262 /*
1257 1263 * Take the indicated CPU offline.
1258 1264 */
1259 1265 int
1260 1266 cpu_offline(cpu_t *cp, int flags)
1261 1267 {
1262 1268 cpupart_t *pp;
1263 1269 int error = 0;
1264 1270 cpu_t *ncp;
1265 1271 int intr_enable;
1266 1272 int cyclic_off = 0;
1267 1273 int callout_off = 0;
1268 1274 int loop_count;
1269 1275 int no_quiesce = 0;
1270 1276 int (*bound_func)(struct cpu *, int);
1271 1277 kthread_t *t;
1272 1278 lpl_t *cpu_lpl;
1273 1279 proc_t *p;
1274 1280 int lgrp_diff_lpl;
1275 1281 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0;
1276 1282
1277 1283 ASSERT(MUTEX_HELD(&cpu_lock));
1278 1284
1279 1285 /*
1280 1286 * If we're going from faulted or spare to offline, just
1281 1287 * clear these flags and update CPU state.
1282 1288 */
1283 1289 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) {
1284 1290 if (cp->cpu_flags & CPU_FAULTED) {
1285 1291 cp->cpu_flags &= ~CPU_FAULTED;
1286 1292 mp_cpu_faulted_exit(cp);
1287 1293 }
1288 1294 cp->cpu_flags &= ~CPU_SPARE;
1289 1295 cpu_set_state(cp);
1290 1296 return (0);
1291 1297 }
1292 1298
1293 1299 /*
1294 1300 * Handle off-line request.
1295 1301 */
1296 1302 pp = cp->cpu_part;
1297 1303 /*
1298 1304 * Don't offline last online CPU in partition
1299 1305 */
1300 1306 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2)
1301 1307 return (EBUSY);
1302 1308 /*
1303 1309 * Unbind all soft-bound threads bound to our CPU and hard bound threads
1304 1310 * if we were asked to.
1305 1311 */
1306 1312 error = cpu_unbind(cp->cpu_id, unbind_all_threads);
1307 1313 if (error != 0)
1308 1314 return (error);
1309 1315 /*
1310 1316 * We shouldn't be bound to this CPU ourselves.
1311 1317 */
1312 1318 if (curthread->t_bound_cpu == cp)
1313 1319 return (EBUSY);
1314 1320
1315 1321 /*
1316 1322 * Tell interested parties that this CPU is going offline.
1317 1323 */
1318 1324 CPU_NEW_GENERATION(cp);
1319 1325 cpu_state_change_notify(cp->cpu_id, CPU_OFF);
1320 1326
1321 1327 /*
1322 1328 * Tell the PG subsystem that the CPU is leaving the partition
1323 1329 */
1324 1330 pg_cpupart_out(cp, pp);
1325 1331
1326 1332 /*
1327 1333 * Take the CPU out of interrupt participation so we won't find
1328 1334 * bound kernel threads. If the architecture cannot completely
1329 1335 * shut off interrupts on the CPU, don't quiesce it, but don't
1330 1336 * run anything but interrupt thread... this is indicated by
1331 1337 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
1332 1338 * off.
1333 1339 */
1334 1340 intr_enable = cp->cpu_flags & CPU_ENABLE;
1335 1341 if (intr_enable)
1336 1342 no_quiesce = cpu_intr_disable(cp);
1337 1343
1338 1344 /*
1339 1345 * Record that we are aiming to offline this cpu. This acts as
1340 1346 * a barrier to further weak binding requests in thread_nomigrate
1341 1347 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
1342 1348 * lean away from this cpu. Further strong bindings are already
1343 1349 * avoided since we hold cpu_lock. Since threads that are set
1344 1350 * runnable around now and others coming off the target cpu are
1345 1351 * directed away from the target, existing strong and weak bindings
1346 1352 * (especially the latter) to the target cpu stand maximum chance of
1347 1353 * being able to unbind during the short delay loop below (if other
1348 1354 * unbound threads compete they may not see cpu in time to unbind
1349 1355 * even if they would do so immediately.
1350 1356 */
1351 1357 cpu_inmotion = cp;
1352 1358 membar_enter();
1353 1359
1354 1360 /*
1355 1361 * Check for kernel threads (strong or weak) bound to that CPU.
1356 1362 * Strongly bound threads may not unbind, and we'll have to return
1357 1363 * EBUSY. Weakly bound threads should always disappear - we've
1358 1364 * stopped more weak binding with cpu_inmotion and existing
1359 1365 * bindings will drain imminently (they may not block). Nonetheless
1360 1366 * we will wait for a fixed period for all bound threads to disappear.
1361 1367 * Inactive interrupt threads are OK (they'll be in TS_FREE
1362 1368 * state). If test finds some bound threads, wait a few ticks
1363 1369 * to give short-lived threads (such as interrupts) chance to
1364 1370 * complete. Note that if no_quiesce is set, i.e. this cpu
1365 1371 * is required to service interrupts, then we take the route
1366 1372 * that permits interrupt threads to be active (or bypassed).
1367 1373 */
1368 1374 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads;
1369 1375
1370 1376 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) {
1371 1377 if (loop_count >= 5) {
1372 1378 error = EBUSY; /* some threads still bound */
1373 1379 break;
1374 1380 }
1375 1381
1376 1382 /*
1377 1383 * If some threads were assigned, give them
1378 1384 * a chance to complete or move.
1379 1385 *
1380 1386 * This assumes that the clock_thread is not bound
1381 1387 * to any CPU, because the clock_thread is needed to
1382 1388 * do the delay(hz/100).
1383 1389 *
1384 1390 * Note: we still hold the cpu_lock while waiting for
1385 1391 * the next clock tick. This is OK since it isn't
1386 1392 * needed for anything else except processor_bind(2),
1387 1393 * and system initialization. If we drop the lock,
1388 1394 * we would risk another p_online disabling the last
1389 1395 * processor.
1390 1396 */
1391 1397 delay(hz/100);
1392 1398 }
1393 1399
1394 1400 if (error == 0 && callout_off == 0) {
1395 1401 callout_cpu_offline(cp);
1396 1402 callout_off = 1;
1397 1403 }
1398 1404
1399 1405 if (error == 0 && cyclic_off == 0) {
1400 1406 if (!cyclic_offline(cp)) {
1401 1407 /*
1402 1408 * We must have bound cyclics...
1403 1409 */
1404 1410 error = EBUSY;
1405 1411 goto out;
1406 1412 }
1407 1413 cyclic_off = 1;
1408 1414 }
1409 1415
1410 1416 /*
1411 1417 * Call mp_cpu_stop() to perform any special operations
1412 1418 * needed for this machine architecture to offline a CPU.
1413 1419 */
1414 1420 if (error == 0)
1415 1421 error = mp_cpu_stop(cp); /* arch-dep hook */
1416 1422
1417 1423 /*
1418 1424 * If that all worked, take the CPU offline and decrement
1419 1425 * ncpus_online.
1420 1426 */
1421 1427 if (error == 0) {
1422 1428 /*
1423 1429 * Put all the cpus into a known safe place.
1424 1430 * No mutexes can be entered while CPUs are paused.
1425 1431 */
1426 1432 pause_cpus(cp, NULL);
1427 1433 /*
1428 1434 * Repeat the operation, if necessary, to make sure that
1429 1435 * all outstanding low-level interrupts run to completion
1430 1436 * before we set the CPU_QUIESCED flag. It's also possible
1431 1437 * that a thread has weak bound to the cpu despite our raising
1432 1438 * cpu_inmotion above since it may have loaded that
1433 1439 * value before the barrier became visible (this would have
1434 1440 * to be the thread that was on the target cpu at the time
1435 1441 * we raised the barrier).
1436 1442 */
1437 1443 if ((!no_quiesce && cp->cpu_intr_actv != 0) ||
1438 1444 (*bound_func)(cp, 1)) {
1439 1445 start_cpus();
1440 1446 (void) mp_cpu_start(cp);
1441 1447 goto again;
1442 1448 }
1443 1449 ncp = cp->cpu_next_part;
1444 1450 cpu_lpl = cp->cpu_lpl;
1445 1451 ASSERT(cpu_lpl != NULL);
1446 1452
1447 1453 /*
1448 1454 * Remove the CPU from the list of active CPUs.
1449 1455 */
1450 1456 cpu_remove_active(cp);
1451 1457
1452 1458 /*
1453 1459 * Walk the active process list and look for threads
1454 1460 * whose home lgroup needs to be updated, or
1455 1461 * the last CPU they run on is the one being offlined now.
1456 1462 */
1457 1463
1458 1464 ASSERT(curthread->t_cpu != cp);
1459 1465 for (p = practive; p != NULL; p = p->p_next) {
1460 1466
1461 1467 t = p->p_tlist;
1462 1468
1463 1469 if (t == NULL)
1464 1470 continue;
1465 1471
1466 1472 lgrp_diff_lpl = 0;
1467 1473
1468 1474 do {
1469 1475 ASSERT(t->t_lpl != NULL);
1470 1476 /*
1471 1477 * Taking last CPU in lpl offline
1472 1478 * Rehome thread if it is in this lpl
1473 1479 * Otherwise, update the count of how many
1474 1480 * threads are in this CPU's lgroup but have
1475 1481 * a different lpl.
1476 1482 */
1477 1483
1478 1484 if (cpu_lpl->lpl_ncpu == 0) {
1479 1485 if (t->t_lpl == cpu_lpl)
1480 1486 lgrp_move_thread(t,
1481 1487 lgrp_choose(t,
1482 1488 t->t_cpupart), 0);
↓ open down ↓ |
1014 lines elided |
↑ open up ↑ |
1483 1489 else if (t->t_lpl->lpl_lgrpid ==
1484 1490 cpu_lpl->lpl_lgrpid)
1485 1491 lgrp_diff_lpl++;
1486 1492 }
1487 1493 ASSERT(t->t_lpl->lpl_ncpu > 0);
1488 1494
1489 1495 /*
1490 1496 * Update CPU last ran on if it was this CPU
1491 1497 */
1492 1498 if (t->t_cpu == cp && t->t_bound_cpu != cp)
1493 - t->t_cpu = disp_lowpri_cpu(ncp,
1494 - t->t_lpl, t->t_pri, NULL);
1499 + t->t_cpu = disp_lowpri_cpu(ncp, t,
1500 + t->t_pri);
1495 1501 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
1496 1502 t->t_weakbound_cpu == cp);
1497 1503
1498 1504 t = t->t_forw;
1499 1505 } while (t != p->p_tlist);
1500 1506
1501 1507 /*
1502 1508 * Didn't find any threads in the same lgroup as this
1503 1509 * CPU with a different lpl, so remove the lgroup from
1504 1510 * the process lgroup bitmask.
1505 1511 */
1506 1512
1507 1513 if (lgrp_diff_lpl == 0)
1508 1514 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid);
1509 1515 }
1510 1516
1511 1517 /*
1512 1518 * Walk thread list looking for threads that need to be
1513 1519 * rehomed, since there are some threads that are not in
1514 1520 * their process's p_tlist.
1515 1521 */
1516 1522
1517 1523 t = curthread;
1518 1524 do {
1519 1525 ASSERT(t != NULL && t->t_lpl != NULL);
1520 1526
1521 1527 /*
1522 1528 * Rehome threads with same lpl as this CPU when this
1523 1529 * is the last CPU in the lpl.
1524 1530 */
1525 1531
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
1526 1532 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl))
1527 1533 lgrp_move_thread(t,
1528 1534 lgrp_choose(t, t->t_cpupart), 1);
1529 1535
1530 1536 ASSERT(t->t_lpl->lpl_ncpu > 0);
1531 1537
1532 1538 /*
1533 1539 * Update CPU last ran on if it was this CPU
1534 1540 */
1535 1541
1536 - if (t->t_cpu == cp && t->t_bound_cpu != cp) {
1537 - t->t_cpu = disp_lowpri_cpu(ncp,
1538 - t->t_lpl, t->t_pri, NULL);
1539 - }
1542 + if (t->t_cpu == cp && t->t_bound_cpu != cp)
1543 + t->t_cpu = disp_lowpri_cpu(ncp, t, t->t_pri);
1544 +
1540 1545 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
1541 1546 t->t_weakbound_cpu == cp);
1542 1547 t = t->t_next;
1543 1548
1544 1549 } while (t != curthread);
1545 1550 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0);
1546 1551 cp->cpu_flags |= CPU_OFFLINE;
1547 1552 disp_cpu_inactive(cp);
1548 1553 if (!no_quiesce)
1549 1554 cp->cpu_flags |= CPU_QUIESCED;
1550 1555 ncpus_online--;
1551 1556 cpu_set_state(cp);
1552 1557 cpu_inmotion = NULL;
1553 1558 start_cpus();
1554 1559 cpu_stats_kstat_destroy(cp);
1555 1560 cpu_delete_intrstat(cp);
1556 1561 lgrp_kstat_destroy(cp);
1557 1562 }
1558 1563
1559 1564 out:
1560 1565 cpu_inmotion = NULL;
1561 1566
1562 1567 /*
1563 1568 * If we failed, re-enable interrupts.
1564 1569 * Do this even if cpu_intr_disable returned an error, because
1565 1570 * it may have partially disabled interrupts.
1566 1571 */
1567 1572 if (error && intr_enable)
1568 1573 cpu_intr_enable(cp);
1569 1574
1570 1575 /*
1571 1576 * If we failed, but managed to offline the cyclic subsystem on this
1572 1577 * CPU, bring it back online.
1573 1578 */
1574 1579 if (error && cyclic_off)
1575 1580 cyclic_online(cp);
1576 1581
1577 1582 /*
1578 1583 * If we failed, but managed to offline callouts on this CPU,
1579 1584 * bring it back online.
1580 1585 */
1581 1586 if (error && callout_off)
1582 1587 callout_cpu_online(cp);
1583 1588
1584 1589 /*
1585 1590 * If we failed, tell the PG subsystem that the CPU is back
1586 1591 */
1587 1592 pg_cpupart_in(cp, pp);
1588 1593
1589 1594 /*
1590 1595 * If we failed, we need to notify everyone that this CPU is back on.
1591 1596 */
1592 1597 if (error != 0) {
1593 1598 CPU_NEW_GENERATION(cp);
1594 1599 cpu_state_change_notify(cp->cpu_id, CPU_ON);
1595 1600 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
1596 1601 }
1597 1602
1598 1603 return (error);
1599 1604 }
1600 1605
1601 1606 /*
1602 1607 * Mark the indicated CPU as faulted, taking it offline.
1603 1608 */
1604 1609 int
1605 1610 cpu_faulted(cpu_t *cp, int flags)
1606 1611 {
1607 1612 int error = 0;
1608 1613
1609 1614 ASSERT(MUTEX_HELD(&cpu_lock));
1610 1615 ASSERT(!cpu_is_poweredoff(cp));
1611 1616
1612 1617 if (cpu_is_offline(cp)) {
1613 1618 cp->cpu_flags &= ~CPU_SPARE;
1614 1619 cp->cpu_flags |= CPU_FAULTED;
1615 1620 mp_cpu_faulted_enter(cp);
1616 1621 cpu_set_state(cp);
1617 1622 return (0);
1618 1623 }
1619 1624
1620 1625 if ((error = cpu_offline(cp, flags)) == 0) {
1621 1626 cp->cpu_flags |= CPU_FAULTED;
1622 1627 mp_cpu_faulted_enter(cp);
1623 1628 cpu_set_state(cp);
1624 1629 }
1625 1630
1626 1631 return (error);
1627 1632 }
1628 1633
1629 1634 /*
1630 1635 * Mark the indicated CPU as a spare, taking it offline.
1631 1636 */
1632 1637 int
1633 1638 cpu_spare(cpu_t *cp, int flags)
1634 1639 {
1635 1640 int error = 0;
1636 1641
1637 1642 ASSERT(MUTEX_HELD(&cpu_lock));
1638 1643 ASSERT(!cpu_is_poweredoff(cp));
1639 1644
1640 1645 if (cpu_is_offline(cp)) {
1641 1646 if (cp->cpu_flags & CPU_FAULTED) {
1642 1647 cp->cpu_flags &= ~CPU_FAULTED;
1643 1648 mp_cpu_faulted_exit(cp);
1644 1649 }
1645 1650 cp->cpu_flags |= CPU_SPARE;
1646 1651 cpu_set_state(cp);
1647 1652 return (0);
1648 1653 }
1649 1654
1650 1655 if ((error = cpu_offline(cp, flags)) == 0) {
1651 1656 cp->cpu_flags |= CPU_SPARE;
1652 1657 cpu_set_state(cp);
1653 1658 }
1654 1659
1655 1660 return (error);
1656 1661 }
1657 1662
1658 1663 /*
1659 1664 * Take the indicated CPU from poweroff to offline.
1660 1665 */
1661 1666 int
1662 1667 cpu_poweron(cpu_t *cp)
1663 1668 {
1664 1669 int error = ENOTSUP;
1665 1670
1666 1671 ASSERT(MUTEX_HELD(&cpu_lock));
1667 1672 ASSERT(cpu_is_poweredoff(cp));
1668 1673
1669 1674 error = mp_cpu_poweron(cp); /* arch-dep hook */
1670 1675 if (error == 0)
1671 1676 cpu_set_state(cp);
1672 1677
1673 1678 return (error);
1674 1679 }
1675 1680
1676 1681 /*
1677 1682 * Take the indicated CPU from any inactive state to powered off.
1678 1683 */
1679 1684 int
1680 1685 cpu_poweroff(cpu_t *cp)
1681 1686 {
1682 1687 int error = ENOTSUP;
1683 1688
1684 1689 ASSERT(MUTEX_HELD(&cpu_lock));
1685 1690 ASSERT(cpu_is_offline(cp));
1686 1691
1687 1692 if (!(cp->cpu_flags & CPU_QUIESCED))
1688 1693 return (EBUSY); /* not completely idle */
1689 1694
1690 1695 error = mp_cpu_poweroff(cp); /* arch-dep hook */
1691 1696 if (error == 0)
1692 1697 cpu_set_state(cp);
1693 1698
1694 1699 return (error);
1695 1700 }
1696 1701
1697 1702 /*
1698 1703 * Initialize the Sequential CPU id lookup table
1699 1704 */
1700 1705 void
1701 1706 cpu_seq_tbl_init()
1702 1707 {
1703 1708 cpu_t **tbl;
1704 1709
1705 1710 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP);
1706 1711 tbl[0] = CPU;
1707 1712
1708 1713 cpu_seq = tbl;
1709 1714 }
1710 1715
1711 1716 /*
1712 1717 * Initialize the CPU lists for the first CPU.
1713 1718 */
1714 1719 void
1715 1720 cpu_list_init(cpu_t *cp)
1716 1721 {
1717 1722 cp->cpu_next = cp;
1718 1723 cp->cpu_prev = cp;
1719 1724 cpu_list = cp;
1720 1725 clock_cpu_list = cp;
1721 1726
1722 1727 cp->cpu_next_onln = cp;
1723 1728 cp->cpu_prev_onln = cp;
1724 1729 cpu_active = cp;
1725 1730
1726 1731 cp->cpu_seqid = 0;
1727 1732 CPUSET_ADD(cpu_seqid_inuse, 0);
1728 1733
1729 1734 /*
1730 1735 * Bootstrap cpu_seq using cpu_list
1731 1736 * The cpu_seq[] table will be dynamically allocated
1732 1737 * when kmem later becomes available (but before going MP)
1733 1738 */
1734 1739 cpu_seq = &cpu_list;
1735 1740
1736 1741 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
1737 1742 cp_default.cp_cpulist = cp;
1738 1743 cp_default.cp_ncpus = 1;
1739 1744 cp->cpu_next_part = cp;
1740 1745 cp->cpu_prev_part = cp;
1741 1746 cp->cpu_part = &cp_default;
1742 1747
1743 1748 CPUSET_ADD(cpu_available, cp->cpu_id);
1744 1749 CPUSET_ADD(cpu_active_set, cp->cpu_id);
1745 1750 }
1746 1751
1747 1752 /*
1748 1753 * Insert a CPU into the list of available CPUs.
1749 1754 */
1750 1755 void
1751 1756 cpu_add_unit(cpu_t *cp)
1752 1757 {
1753 1758 int seqid;
1754 1759
1755 1760 ASSERT(MUTEX_HELD(&cpu_lock));
1756 1761 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */
1757 1762
1758 1763 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0);
1759 1764
1760 1765 /*
1761 1766 * Note: most users of the cpu_list will grab the
1762 1767 * cpu_lock to insure that it isn't modified. However,
1763 1768 * certain users can't or won't do that. To allow this
1764 1769 * we pause the other cpus. Users who walk the list
1765 1770 * without cpu_lock, must disable kernel preemption
1766 1771 * to insure that the list isn't modified underneath
1767 1772 * them. Also, any cached pointers to cpu structures
1768 1773 * must be revalidated by checking to see if the
1769 1774 * cpu_next pointer points to itself. This check must
1770 1775 * be done with the cpu_lock held or kernel preemption
1771 1776 * disabled. This check relies upon the fact that
1772 1777 * old cpu structures are not free'ed or cleared after
1773 1778 * then are removed from the cpu_list.
1774 1779 *
1775 1780 * Note that the clock code walks the cpu list dereferencing
1776 1781 * the cpu_part pointer, so we need to initialize it before
1777 1782 * adding the cpu to the list.
1778 1783 */
1779 1784 cp->cpu_part = &cp_default;
1780 1785 pause_cpus(NULL, NULL);
1781 1786 cp->cpu_next = cpu_list;
1782 1787 cp->cpu_prev = cpu_list->cpu_prev;
1783 1788 cpu_list->cpu_prev->cpu_next = cp;
1784 1789 cpu_list->cpu_prev = cp;
1785 1790 start_cpus();
1786 1791
1787 1792 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++)
1788 1793 continue;
1789 1794 CPUSET_ADD(cpu_seqid_inuse, seqid);
1790 1795 cp->cpu_seqid = seqid;
1791 1796
1792 1797 if (seqid > max_cpu_seqid_ever)
1793 1798 max_cpu_seqid_ever = seqid;
1794 1799
1795 1800 ASSERT(ncpus < max_ncpus);
1796 1801 ncpus++;
1797 1802 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
1798 1803 cpu[cp->cpu_id] = cp;
1799 1804 CPUSET_ADD(cpu_available, cp->cpu_id);
1800 1805 cpu_seq[cp->cpu_seqid] = cp;
1801 1806
1802 1807 /*
1803 1808 * allocate a pause thread for this CPU.
1804 1809 */
1805 1810 cpu_pause_alloc(cp);
1806 1811
1807 1812 /*
1808 1813 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
1809 1814 * link them into a list of just that CPU.
1810 1815 * This is so that disp_lowpri_cpu will work for thread_create in
1811 1816 * pause_cpus() when called from the startup thread in a new CPU.
1812 1817 */
1813 1818 cp->cpu_next_onln = cp;
1814 1819 cp->cpu_prev_onln = cp;
1815 1820 cpu_info_kstat_create(cp);
1816 1821 cp->cpu_next_part = cp;
1817 1822 cp->cpu_prev_part = cp;
1818 1823
1819 1824 init_cpu_mstate(cp, CMS_SYSTEM);
1820 1825
1821 1826 pool_pset_mod = gethrtime();
1822 1827 }
1823 1828
1824 1829 /*
1825 1830 * Do the opposite of cpu_add_unit().
1826 1831 */
1827 1832 void
1828 1833 cpu_del_unit(int cpuid)
1829 1834 {
1830 1835 struct cpu *cp, *cpnext;
1831 1836
1832 1837 ASSERT(MUTEX_HELD(&cpu_lock));
1833 1838 cp = cpu[cpuid];
1834 1839 ASSERT(cp != NULL);
1835 1840
1836 1841 ASSERT(cp->cpu_next_onln == cp);
1837 1842 ASSERT(cp->cpu_prev_onln == cp);
1838 1843 ASSERT(cp->cpu_next_part == cp);
1839 1844 ASSERT(cp->cpu_prev_part == cp);
1840 1845
1841 1846 /*
1842 1847 * Tear down the CPU's physical ID cache, and update any
1843 1848 * processor groups
1844 1849 */
1845 1850 pg_cpu_fini(cp, NULL);
1846 1851 pghw_physid_destroy(cp);
1847 1852
1848 1853 /*
1849 1854 * Destroy kstat stuff.
1850 1855 */
1851 1856 cpu_info_kstat_destroy(cp);
1852 1857 term_cpu_mstate(cp);
1853 1858 /*
1854 1859 * Free up pause thread.
1855 1860 */
1856 1861 cpu_pause_free(cp);
1857 1862 CPUSET_DEL(cpu_available, cp->cpu_id);
1858 1863 cpu[cp->cpu_id] = NULL;
1859 1864 cpu_seq[cp->cpu_seqid] = NULL;
1860 1865
1861 1866 /*
1862 1867 * The clock thread and mutex_vector_enter cannot hold the
1863 1868 * cpu_lock while traversing the cpu list, therefore we pause
1864 1869 * all other threads by pausing the other cpus. These, and any
1865 1870 * other routines holding cpu pointers while possibly sleeping
1866 1871 * must be sure to call kpreempt_disable before processing the
1867 1872 * list and be sure to check that the cpu has not been deleted
1868 1873 * after any sleeps (check cp->cpu_next != NULL). We guarantee
1869 1874 * to keep the deleted cpu structure around.
1870 1875 *
1871 1876 * Note that this MUST be done AFTER cpu_available
1872 1877 * has been updated so that we don't waste time
1873 1878 * trying to pause the cpu we're trying to delete.
1874 1879 */
1875 1880 pause_cpus(NULL, NULL);
1876 1881
1877 1882 cpnext = cp->cpu_next;
1878 1883 cp->cpu_prev->cpu_next = cp->cpu_next;
1879 1884 cp->cpu_next->cpu_prev = cp->cpu_prev;
1880 1885 if (cp == cpu_list)
1881 1886 cpu_list = cpnext;
1882 1887
1883 1888 /*
1884 1889 * Signals that the cpu has been deleted (see above).
1885 1890 */
1886 1891 cp->cpu_next = NULL;
1887 1892 cp->cpu_prev = NULL;
1888 1893
1889 1894 start_cpus();
1890 1895
1891 1896 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid);
1892 1897 ncpus--;
1893 1898 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0);
1894 1899
1895 1900 pool_pset_mod = gethrtime();
1896 1901 }
1897 1902
1898 1903 /*
1899 1904 * Add a CPU to the list of active CPUs.
1900 1905 * This routine must not get any locks, because other CPUs are paused.
1901 1906 */
1902 1907 static void
1903 1908 cpu_add_active_internal(cpu_t *cp)
1904 1909 {
1905 1910 cpupart_t *pp = cp->cpu_part;
1906 1911
1907 1912 ASSERT(MUTEX_HELD(&cpu_lock));
1908 1913 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */
1909 1914
1910 1915 ncpus_online++;
1911 1916 cpu_set_state(cp);
1912 1917 cp->cpu_next_onln = cpu_active;
1913 1918 cp->cpu_prev_onln = cpu_active->cpu_prev_onln;
1914 1919 cpu_active->cpu_prev_onln->cpu_next_onln = cp;
1915 1920 cpu_active->cpu_prev_onln = cp;
1916 1921 CPUSET_ADD(cpu_active_set, cp->cpu_id);
1917 1922
1918 1923 if (pp->cp_cpulist) {
1919 1924 cp->cpu_next_part = pp->cp_cpulist;
1920 1925 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part;
1921 1926 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp;
1922 1927 pp->cp_cpulist->cpu_prev_part = cp;
1923 1928 } else {
1924 1929 ASSERT(pp->cp_ncpus == 0);
1925 1930 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
1926 1931 }
1927 1932 pp->cp_ncpus++;
1928 1933 if (pp->cp_ncpus == 1) {
1929 1934 cp_numparts_nonempty++;
1930 1935 ASSERT(cp_numparts_nonempty != 0);
1931 1936 }
1932 1937
1933 1938 pg_cpu_active(cp);
1934 1939 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
1935 1940
1936 1941 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
1937 1942 }
1938 1943
1939 1944 /*
1940 1945 * Add a CPU to the list of active CPUs.
1941 1946 * This is called from machine-dependent layers when a new CPU is started.
1942 1947 */
1943 1948 void
1944 1949 cpu_add_active(cpu_t *cp)
1945 1950 {
1946 1951 pg_cpupart_in(cp, cp->cpu_part);
1947 1952
1948 1953 pause_cpus(NULL, NULL);
1949 1954 cpu_add_active_internal(cp);
1950 1955 start_cpus();
1951 1956
1952 1957 cpu_stats_kstat_create(cp);
1953 1958 cpu_create_intrstat(cp);
1954 1959 lgrp_kstat_create(cp);
1955 1960 cpu_state_change_notify(cp->cpu_id, CPU_INIT);
1956 1961 }
1957 1962
1958 1963
1959 1964 /*
1960 1965 * Remove a CPU from the list of active CPUs.
1961 1966 * This routine must not get any locks, because other CPUs are paused.
1962 1967 */
1963 1968 /* ARGSUSED */
1964 1969 static void
1965 1970 cpu_remove_active(cpu_t *cp)
1966 1971 {
1967 1972 cpupart_t *pp = cp->cpu_part;
1968 1973
1969 1974 ASSERT(MUTEX_HELD(&cpu_lock));
1970 1975 ASSERT(cp->cpu_next_onln != cp); /* not the last one */
1971 1976 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */
1972 1977
1973 1978 pg_cpu_inactive(cp);
1974 1979
1975 1980 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
1976 1981
1977 1982 if (cp == clock_cpu_list)
1978 1983 clock_cpu_list = cp->cpu_next_onln;
1979 1984
1980 1985 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln;
1981 1986 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln;
1982 1987 if (cpu_active == cp) {
1983 1988 cpu_active = cp->cpu_next_onln;
1984 1989 }
1985 1990 cp->cpu_next_onln = cp;
1986 1991 cp->cpu_prev_onln = cp;
1987 1992 CPUSET_DEL(cpu_active_set, cp->cpu_id);
1988 1993
1989 1994 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
1990 1995 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
1991 1996 if (pp->cp_cpulist == cp) {
1992 1997 pp->cp_cpulist = cp->cpu_next_part;
1993 1998 ASSERT(pp->cp_cpulist != cp);
1994 1999 }
1995 2000 cp->cpu_next_part = cp;
1996 2001 cp->cpu_prev_part = cp;
1997 2002 pp->cp_ncpus--;
1998 2003 if (pp->cp_ncpus == 0) {
1999 2004 cp_numparts_nonempty--;
2000 2005 ASSERT(cp_numparts_nonempty != 0);
2001 2006 }
2002 2007 }
2003 2008
2004 2009 /*
2005 2010 * Routine used to setup a newly inserted CPU in preparation for starting
2006 2011 * it running code.
2007 2012 */
2008 2013 int
2009 2014 cpu_configure(int cpuid)
2010 2015 {
2011 2016 int retval = 0;
2012 2017
2013 2018 ASSERT(MUTEX_HELD(&cpu_lock));
2014 2019
2015 2020 /*
2016 2021 * Some structures are statically allocated based upon
2017 2022 * the maximum number of cpus the system supports. Do not
2018 2023 * try to add anything beyond this limit.
2019 2024 */
2020 2025 if (cpuid < 0 || cpuid >= NCPU) {
2021 2026 return (EINVAL);
2022 2027 }
2023 2028
2024 2029 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) {
2025 2030 return (EALREADY);
2026 2031 }
2027 2032
2028 2033 if ((retval = mp_cpu_configure(cpuid)) != 0) {
2029 2034 return (retval);
2030 2035 }
2031 2036
2032 2037 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF;
2033 2038 cpu_set_state(cpu[cpuid]);
2034 2039 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG);
2035 2040 if (retval != 0)
2036 2041 (void) mp_cpu_unconfigure(cpuid);
2037 2042
2038 2043 return (retval);
2039 2044 }
2040 2045
2041 2046 /*
2042 2047 * Routine used to cleanup a CPU that has been powered off. This will
2043 2048 * destroy all per-cpu information related to this cpu.
2044 2049 */
2045 2050 int
2046 2051 cpu_unconfigure(int cpuid)
2047 2052 {
2048 2053 int error;
2049 2054
2050 2055 ASSERT(MUTEX_HELD(&cpu_lock));
2051 2056
2052 2057 if (cpu[cpuid] == NULL) {
2053 2058 return (ENODEV);
2054 2059 }
2055 2060
2056 2061 if (cpu[cpuid]->cpu_flags == 0) {
2057 2062 return (EALREADY);
2058 2063 }
2059 2064
2060 2065 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) {
2061 2066 return (EBUSY);
2062 2067 }
2063 2068
2064 2069 if (cpu[cpuid]->cpu_props != NULL) {
2065 2070 (void) nvlist_free(cpu[cpuid]->cpu_props);
2066 2071 cpu[cpuid]->cpu_props = NULL;
2067 2072 }
2068 2073
2069 2074 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG);
2070 2075
2071 2076 if (error != 0)
2072 2077 return (error);
2073 2078
2074 2079 return (mp_cpu_unconfigure(cpuid));
2075 2080 }
2076 2081
2077 2082 /*
2078 2083 * Routines for registering and de-registering cpu_setup callback functions.
2079 2084 *
2080 2085 * Caller's context
2081 2086 * These routines must not be called from a driver's attach(9E) or
2082 2087 * detach(9E) entry point.
2083 2088 *
2084 2089 * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
2085 2090 */
2086 2091
2087 2092 /*
2088 2093 * Ideally, these would be dynamically allocated and put into a linked
2089 2094 * list; however that is not feasible because the registration routine
2090 2095 * has to be available before the kmem allocator is working (in fact,
2091 2096 * it is called by the kmem allocator init code). In any case, there
2092 2097 * are quite a few extra entries for future users.
2093 2098 */
2094 2099 #define NCPU_SETUPS 20
2095 2100
2096 2101 struct cpu_setup {
2097 2102 cpu_setup_func_t *func;
2098 2103 void *arg;
2099 2104 } cpu_setups[NCPU_SETUPS];
2100 2105
2101 2106 void
2102 2107 register_cpu_setup_func(cpu_setup_func_t *func, void *arg)
2103 2108 {
2104 2109 int i;
2105 2110
2106 2111 ASSERT(MUTEX_HELD(&cpu_lock));
2107 2112
2108 2113 for (i = 0; i < NCPU_SETUPS; i++)
2109 2114 if (cpu_setups[i].func == NULL)
2110 2115 break;
2111 2116 if (i >= NCPU_SETUPS)
2112 2117 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries");
2113 2118
2114 2119 cpu_setups[i].func = func;
2115 2120 cpu_setups[i].arg = arg;
2116 2121 }
2117 2122
2118 2123 void
2119 2124 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg)
2120 2125 {
2121 2126 int i;
2122 2127
2123 2128 ASSERT(MUTEX_HELD(&cpu_lock));
2124 2129
2125 2130 for (i = 0; i < NCPU_SETUPS; i++)
2126 2131 if ((cpu_setups[i].func == func) &&
2127 2132 (cpu_setups[i].arg == arg))
2128 2133 break;
2129 2134 if (i >= NCPU_SETUPS)
2130 2135 cmn_err(CE_PANIC, "Could not find cpu_setup callback to "
2131 2136 "deregister");
2132 2137
2133 2138 cpu_setups[i].func = NULL;
2134 2139 cpu_setups[i].arg = 0;
2135 2140 }
2136 2141
2137 2142 /*
2138 2143 * Call any state change hooks for this CPU, ignore any errors.
2139 2144 */
2140 2145 void
2141 2146 cpu_state_change_notify(int id, cpu_setup_t what)
2142 2147 {
2143 2148 int i;
2144 2149
2145 2150 ASSERT(MUTEX_HELD(&cpu_lock));
2146 2151
2147 2152 for (i = 0; i < NCPU_SETUPS; i++) {
2148 2153 if (cpu_setups[i].func != NULL) {
2149 2154 cpu_setups[i].func(what, id, cpu_setups[i].arg);
2150 2155 }
2151 2156 }
2152 2157 }
2153 2158
2154 2159 /*
2155 2160 * Call any state change hooks for this CPU, undo it if error found.
2156 2161 */
2157 2162 static int
2158 2163 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo)
2159 2164 {
2160 2165 int i;
2161 2166 int retval = 0;
2162 2167
2163 2168 ASSERT(MUTEX_HELD(&cpu_lock));
2164 2169
2165 2170 for (i = 0; i < NCPU_SETUPS; i++) {
2166 2171 if (cpu_setups[i].func != NULL) {
2167 2172 retval = cpu_setups[i].func(what, id,
2168 2173 cpu_setups[i].arg);
2169 2174 if (retval) {
2170 2175 for (i--; i >= 0; i--) {
2171 2176 if (cpu_setups[i].func != NULL)
2172 2177 cpu_setups[i].func(undo,
2173 2178 id, cpu_setups[i].arg);
2174 2179 }
2175 2180 break;
2176 2181 }
2177 2182 }
2178 2183 }
2179 2184 return (retval);
2180 2185 }
2181 2186
2182 2187 /*
2183 2188 * Export information about this CPU via the kstat mechanism.
2184 2189 */
2185 2190 static struct {
2186 2191 kstat_named_t ci_state;
2187 2192 kstat_named_t ci_state_begin;
2188 2193 kstat_named_t ci_cpu_type;
2189 2194 kstat_named_t ci_fpu_type;
2190 2195 kstat_named_t ci_clock_MHz;
2191 2196 kstat_named_t ci_chip_id;
2192 2197 kstat_named_t ci_implementation;
2193 2198 kstat_named_t ci_brandstr;
2194 2199 kstat_named_t ci_core_id;
2195 2200 kstat_named_t ci_curr_clock_Hz;
2196 2201 kstat_named_t ci_supp_freq_Hz;
2197 2202 kstat_named_t ci_pg_id;
2198 2203 #if defined(__sparcv9)
2199 2204 kstat_named_t ci_device_ID;
2200 2205 kstat_named_t ci_cpu_fru;
2201 2206 #endif
2202 2207 #if defined(__x86)
2203 2208 kstat_named_t ci_vendorstr;
2204 2209 kstat_named_t ci_family;
2205 2210 kstat_named_t ci_model;
2206 2211 kstat_named_t ci_step;
2207 2212 kstat_named_t ci_clogid;
2208 2213 kstat_named_t ci_pkg_core_id;
2209 2214 kstat_named_t ci_ncpuperchip;
2210 2215 kstat_named_t ci_ncoreperchip;
2211 2216 kstat_named_t ci_max_cstates;
2212 2217 kstat_named_t ci_curr_cstate;
2213 2218 kstat_named_t ci_cacheid;
2214 2219 kstat_named_t ci_sktstr;
2215 2220 #endif
2216 2221 } cpu_info_template = {
2217 2222 { "state", KSTAT_DATA_CHAR },
2218 2223 { "state_begin", KSTAT_DATA_LONG },
2219 2224 { "cpu_type", KSTAT_DATA_CHAR },
2220 2225 { "fpu_type", KSTAT_DATA_CHAR },
2221 2226 { "clock_MHz", KSTAT_DATA_LONG },
2222 2227 { "chip_id", KSTAT_DATA_LONG },
2223 2228 { "implementation", KSTAT_DATA_STRING },
2224 2229 { "brand", KSTAT_DATA_STRING },
2225 2230 { "core_id", KSTAT_DATA_LONG },
2226 2231 { "current_clock_Hz", KSTAT_DATA_UINT64 },
2227 2232 { "supported_frequencies_Hz", KSTAT_DATA_STRING },
2228 2233 { "pg_id", KSTAT_DATA_LONG },
2229 2234 #if defined(__sparcv9)
2230 2235 { "device_ID", KSTAT_DATA_UINT64 },
2231 2236 { "cpu_fru", KSTAT_DATA_STRING },
2232 2237 #endif
2233 2238 #if defined(__x86)
2234 2239 { "vendor_id", KSTAT_DATA_STRING },
2235 2240 { "family", KSTAT_DATA_INT32 },
2236 2241 { "model", KSTAT_DATA_INT32 },
2237 2242 { "stepping", KSTAT_DATA_INT32 },
2238 2243 { "clog_id", KSTAT_DATA_INT32 },
2239 2244 { "pkg_core_id", KSTAT_DATA_LONG },
2240 2245 { "ncpu_per_chip", KSTAT_DATA_INT32 },
2241 2246 { "ncore_per_chip", KSTAT_DATA_INT32 },
2242 2247 { "supported_max_cstates", KSTAT_DATA_INT32 },
2243 2248 { "current_cstate", KSTAT_DATA_INT32 },
2244 2249 { "cache_id", KSTAT_DATA_INT32 },
2245 2250 { "socket_type", KSTAT_DATA_STRING },
2246 2251 #endif
2247 2252 };
2248 2253
2249 2254 static kmutex_t cpu_info_template_lock;
2250 2255
2251 2256 static int
2252 2257 cpu_info_kstat_update(kstat_t *ksp, int rw)
2253 2258 {
2254 2259 cpu_t *cp = ksp->ks_private;
2255 2260 const char *pi_state;
2256 2261
2257 2262 if (rw == KSTAT_WRITE)
2258 2263 return (EACCES);
2259 2264
2260 2265 #if defined(__x86)
2261 2266 /* Is the cpu still initialising itself? */
2262 2267 if (cpuid_checkpass(cp, 1) == 0)
2263 2268 return (ENXIO);
2264 2269 #endif
2265 2270 switch (cp->cpu_type_info.pi_state) {
2266 2271 case P_ONLINE:
2267 2272 pi_state = PS_ONLINE;
2268 2273 break;
2269 2274 case P_POWEROFF:
2270 2275 pi_state = PS_POWEROFF;
2271 2276 break;
2272 2277 case P_NOINTR:
2273 2278 pi_state = PS_NOINTR;
2274 2279 break;
2275 2280 case P_FAULTED:
2276 2281 pi_state = PS_FAULTED;
2277 2282 break;
2278 2283 case P_SPARE:
2279 2284 pi_state = PS_SPARE;
2280 2285 break;
2281 2286 case P_OFFLINE:
2282 2287 pi_state = PS_OFFLINE;
2283 2288 break;
2284 2289 default:
2285 2290 pi_state = "unknown";
2286 2291 }
2287 2292 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state);
2288 2293 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin;
2289 2294 (void) strncpy(cpu_info_template.ci_cpu_type.value.c,
2290 2295 cp->cpu_type_info.pi_processor_type, 15);
2291 2296 (void) strncpy(cpu_info_template.ci_fpu_type.value.c,
2292 2297 cp->cpu_type_info.pi_fputypes, 15);
2293 2298 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
2294 2299 cpu_info_template.ci_chip_id.value.l =
2295 2300 pg_plat_hw_instance_id(cp, PGHW_CHIP);
2296 2301 kstat_named_setstr(&cpu_info_template.ci_implementation,
2297 2302 cp->cpu_idstr);
2298 2303 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
2299 2304 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
2300 2305 cpu_info_template.ci_curr_clock_Hz.value.ui64 =
2301 2306 cp->cpu_curr_clock;
2302 2307 cpu_info_template.ci_pg_id.value.l =
2303 2308 cp->cpu_pg && cp->cpu_pg->cmt_lineage ?
2304 2309 cp->cpu_pg->cmt_lineage->pg_id : -1;
2305 2310 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz,
2306 2311 cp->cpu_supp_freqs);
2307 2312 #if defined(__sparcv9)
2308 2313 cpu_info_template.ci_device_ID.value.ui64 =
2309 2314 cpunodes[cp->cpu_id].device_id;
2310 2315 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp));
2311 2316 #endif
2312 2317 #if defined(__x86)
2313 2318 kstat_named_setstr(&cpu_info_template.ci_vendorstr,
2314 2319 cpuid_getvendorstr(cp));
2315 2320 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
2316 2321 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
2317 2322 cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
2318 2323 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
2319 2324 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp);
2320 2325 cpu_info_template.ci_ncoreperchip.value.l =
2321 2326 cpuid_get_ncore_per_chip(cp);
2322 2327 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp);
2323 2328 cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates;
2324 2329 cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp);
2325 2330 cpu_info_template.ci_cacheid.value.i32 = cpuid_get_cacheid(cp);
2326 2331 kstat_named_setstr(&cpu_info_template.ci_sktstr,
2327 2332 cpuid_getsocketstr(cp));
2328 2333 #endif
2329 2334
2330 2335 return (0);
2331 2336 }
2332 2337
2333 2338 static void
2334 2339 cpu_info_kstat_create(cpu_t *cp)
2335 2340 {
2336 2341 zoneid_t zoneid;
2337 2342
2338 2343 ASSERT(MUTEX_HELD(&cpu_lock));
2339 2344
2340 2345 if (pool_pset_enabled())
2341 2346 zoneid = GLOBAL_ZONEID;
2342 2347 else
2343 2348 zoneid = ALL_ZONES;
2344 2349 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id,
2345 2350 NULL, "misc", KSTAT_TYPE_NAMED,
2346 2351 sizeof (cpu_info_template) / sizeof (kstat_named_t),
2347 2352 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE, zoneid)) != NULL) {
2348 2353 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN;
2349 2354 #if defined(__sparcv9)
2350 2355 cp->cpu_info_kstat->ks_data_size +=
2351 2356 strlen(cpu_fru_fmri(cp)) + 1;
2352 2357 #endif
2353 2358 #if defined(__x86)
2354 2359 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN;
2355 2360 #endif
2356 2361 if (cp->cpu_supp_freqs != NULL)
2357 2362 cp->cpu_info_kstat->ks_data_size +=
2358 2363 strlen(cp->cpu_supp_freqs) + 1;
2359 2364 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock;
2360 2365 cp->cpu_info_kstat->ks_data = &cpu_info_template;
2361 2366 cp->cpu_info_kstat->ks_private = cp;
2362 2367 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update;
2363 2368 kstat_install(cp->cpu_info_kstat);
2364 2369 }
2365 2370 }
2366 2371
2367 2372 static void
2368 2373 cpu_info_kstat_destroy(cpu_t *cp)
2369 2374 {
2370 2375 ASSERT(MUTEX_HELD(&cpu_lock));
2371 2376
2372 2377 kstat_delete(cp->cpu_info_kstat);
2373 2378 cp->cpu_info_kstat = NULL;
2374 2379 }
2375 2380
2376 2381 /*
2377 2382 * Create and install kstats for the boot CPU.
2378 2383 */
2379 2384 void
2380 2385 cpu_kstat_init(cpu_t *cp)
2381 2386 {
2382 2387 mutex_enter(&cpu_lock);
2383 2388 cpu_info_kstat_create(cp);
2384 2389 cpu_stats_kstat_create(cp);
2385 2390 cpu_create_intrstat(cp);
2386 2391 cpu_set_state(cp);
2387 2392 mutex_exit(&cpu_lock);
2388 2393 }
2389 2394
2390 2395 /*
2391 2396 * Make visible to the zone that subset of the cpu information that would be
2392 2397 * initialized when a cpu is configured (but still offline).
2393 2398 */
2394 2399 void
2395 2400 cpu_visibility_configure(cpu_t *cp, zone_t *zone)
2396 2401 {
2397 2402 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
2398 2403
2399 2404 ASSERT(MUTEX_HELD(&cpu_lock));
2400 2405 ASSERT(pool_pset_enabled());
2401 2406 ASSERT(cp != NULL);
2402 2407
2403 2408 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
2404 2409 zone->zone_ncpus++;
2405 2410 ASSERT(zone->zone_ncpus <= ncpus);
2406 2411 }
2407 2412 if (cp->cpu_info_kstat != NULL)
2408 2413 kstat_zone_add(cp->cpu_info_kstat, zoneid);
2409 2414 }
2410 2415
2411 2416 /*
2412 2417 * Make visible to the zone that subset of the cpu information that would be
2413 2418 * initialized when a previously configured cpu is onlined.
2414 2419 */
2415 2420 void
2416 2421 cpu_visibility_online(cpu_t *cp, zone_t *zone)
2417 2422 {
2418 2423 kstat_t *ksp;
2419 2424 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2420 2425 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
2421 2426 processorid_t cpun;
2422 2427
2423 2428 ASSERT(MUTEX_HELD(&cpu_lock));
2424 2429 ASSERT(pool_pset_enabled());
2425 2430 ASSERT(cp != NULL);
2426 2431 ASSERT(cpu_is_active(cp));
2427 2432
2428 2433 cpun = cp->cpu_id;
2429 2434 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
2430 2435 zone->zone_ncpus_online++;
2431 2436 ASSERT(zone->zone_ncpus_online <= ncpus_online);
2432 2437 }
2433 2438 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
2434 2439 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
2435 2440 != NULL) {
2436 2441 kstat_zone_add(ksp, zoneid);
2437 2442 kstat_rele(ksp);
2438 2443 }
2439 2444 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
2440 2445 kstat_zone_add(ksp, zoneid);
2441 2446 kstat_rele(ksp);
2442 2447 }
2443 2448 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
2444 2449 kstat_zone_add(ksp, zoneid);
2445 2450 kstat_rele(ksp);
2446 2451 }
2447 2452 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
2448 2453 NULL) {
2449 2454 kstat_zone_add(ksp, zoneid);
2450 2455 kstat_rele(ksp);
2451 2456 }
2452 2457 }
2453 2458
2454 2459 /*
2455 2460 * Update relevant kstats such that cpu is now visible to processes
2456 2461 * executing in specified zone.
2457 2462 */
2458 2463 void
2459 2464 cpu_visibility_add(cpu_t *cp, zone_t *zone)
2460 2465 {
2461 2466 cpu_visibility_configure(cp, zone);
2462 2467 if (cpu_is_active(cp))
2463 2468 cpu_visibility_online(cp, zone);
2464 2469 }
2465 2470
2466 2471 /*
2467 2472 * Make invisible to the zone that subset of the cpu information that would be
2468 2473 * torn down when a previously offlined cpu is unconfigured.
2469 2474 */
2470 2475 void
2471 2476 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone)
2472 2477 {
2473 2478 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
2474 2479
2475 2480 ASSERT(MUTEX_HELD(&cpu_lock));
2476 2481 ASSERT(pool_pset_enabled());
2477 2482 ASSERT(cp != NULL);
2478 2483
2479 2484 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
2480 2485 ASSERT(zone->zone_ncpus != 0);
2481 2486 zone->zone_ncpus--;
2482 2487 }
2483 2488 if (cp->cpu_info_kstat)
2484 2489 kstat_zone_remove(cp->cpu_info_kstat, zoneid);
2485 2490 }
2486 2491
2487 2492 /*
2488 2493 * Make invisible to the zone that subset of the cpu information that would be
2489 2494 * torn down when a cpu is offlined (but still configured).
2490 2495 */
2491 2496 void
2492 2497 cpu_visibility_offline(cpu_t *cp, zone_t *zone)
2493 2498 {
2494 2499 kstat_t *ksp;
2495 2500 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */
2496 2501 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
2497 2502 processorid_t cpun;
2498 2503
2499 2504 ASSERT(MUTEX_HELD(&cpu_lock));
2500 2505 ASSERT(pool_pset_enabled());
2501 2506 ASSERT(cp != NULL);
2502 2507 ASSERT(cpu_is_active(cp));
2503 2508
2504 2509 cpun = cp->cpu_id;
2505 2510 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
2506 2511 ASSERT(zone->zone_ncpus_online != 0);
2507 2512 zone->zone_ncpus_online--;
2508 2513 }
2509 2514
2510 2515 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
2511 2516 NULL) {
2512 2517 kstat_zone_remove(ksp, zoneid);
2513 2518 kstat_rele(ksp);
2514 2519 }
2515 2520 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
2516 2521 kstat_zone_remove(ksp, zoneid);
2517 2522 kstat_rele(ksp);
2518 2523 }
2519 2524 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
2520 2525 kstat_zone_remove(ksp, zoneid);
2521 2526 kstat_rele(ksp);
2522 2527 }
2523 2528 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
2524 2529 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
2525 2530 != NULL) {
2526 2531 kstat_zone_remove(ksp, zoneid);
2527 2532 kstat_rele(ksp);
2528 2533 }
2529 2534 }
2530 2535
2531 2536 /*
2532 2537 * Update relevant kstats such that cpu is no longer visible to processes
2533 2538 * executing in specified zone.
2534 2539 */
2535 2540 void
2536 2541 cpu_visibility_remove(cpu_t *cp, zone_t *zone)
2537 2542 {
2538 2543 if (cpu_is_active(cp))
2539 2544 cpu_visibility_offline(cp, zone);
2540 2545 cpu_visibility_unconfigure(cp, zone);
2541 2546 }
2542 2547
2543 2548 /*
2544 2549 * Bind a thread to a CPU as requested.
2545 2550 */
2546 2551 int
2547 2552 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind,
2548 2553 int *error)
2549 2554 {
2550 2555 processorid_t binding;
2551 2556 cpu_t *cp = NULL;
2552 2557
2553 2558 ASSERT(MUTEX_HELD(&cpu_lock));
2554 2559 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
2555 2560
2556 2561 thread_lock(tp);
2557 2562
2558 2563 /*
2559 2564 * Record old binding, but change the obind, which was initialized
2560 2565 * to PBIND_NONE, only if this thread has a binding. This avoids
2561 2566 * reporting PBIND_NONE for a process when some LWPs are bound.
2562 2567 */
2563 2568 binding = tp->t_bind_cpu;
2564 2569 if (binding != PBIND_NONE)
2565 2570 *obind = binding; /* record old binding */
2566 2571
2567 2572 switch (bind) {
2568 2573 case PBIND_QUERY:
2569 2574 /* Just return the old binding */
2570 2575 thread_unlock(tp);
2571 2576 return (0);
2572 2577
2573 2578 case PBIND_QUERY_TYPE:
2574 2579 /* Return the binding type */
2575 2580 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD;
2576 2581 thread_unlock(tp);
2577 2582 return (0);
2578 2583
2579 2584 case PBIND_SOFT:
2580 2585 /*
2581 2586 * Set soft binding for this thread and return the actual
2582 2587 * binding
2583 2588 */
2584 2589 TB_CPU_SOFT_SET(tp);
2585 2590 thread_unlock(tp);
2586 2591 return (0);
2587 2592
2588 2593 case PBIND_HARD:
2589 2594 /*
2590 2595 * Set hard binding for this thread and return the actual
2591 2596 * binding
2592 2597 */
2593 2598 TB_CPU_HARD_SET(tp);
2594 2599 thread_unlock(tp);
2595 2600 return (0);
2596 2601
2597 2602 default:
2598 2603 break;
2599 2604 }
2600 2605
2601 2606 /*
2602 2607 * If this thread/LWP cannot be bound because of permission
2603 2608 * problems, just note that and return success so that the
2604 2609 * other threads/LWPs will be bound. This is the way
2605 2610 * processor_bind() is defined to work.
2606 2611 *
2607 2612 * Binding will get EPERM if the thread is of system class
2608 2613 * or hasprocperm() fails.
2609 2614 */
2610 2615 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) {
2611 2616 *error = EPERM;
2612 2617 thread_unlock(tp);
2613 2618 return (0);
2614 2619 }
2615 2620
2616 2621 binding = bind;
2617 2622 if (binding != PBIND_NONE) {
2618 2623 cp = cpu_get((processorid_t)binding);
2619 2624 /*
2620 2625 * Make sure binding is valid and is in right partition.
2621 2626 */
2622 2627 if (cp == NULL || tp->t_cpupart != cp->cpu_part) {
2623 2628 *error = EINVAL;
2624 2629 thread_unlock(tp);
2625 2630 return (0);
2626 2631 }
2627 2632 }
2628 2633 tp->t_bind_cpu = binding; /* set new binding */
2629 2634
2630 2635 /*
2631 2636 * If there is no system-set reason for affinity, set
2632 2637 * the t_bound_cpu field to reflect the binding.
2633 2638 */
2634 2639 if (tp->t_affinitycnt == 0) {
2635 2640 if (binding == PBIND_NONE) {
2636 2641 /*
2637 2642 * We may need to adjust disp_max_unbound_pri
2638 2643 * since we're becoming unbound.
2639 2644 */
2640 2645 disp_adjust_unbound_pri(tp);
2641 2646
2642 2647 tp->t_bound_cpu = NULL; /* set new binding */
2643 2648
2644 2649 /*
2645 2650 * Move thread to lgroup with strongest affinity
2646 2651 * after unbinding
2647 2652 */
2648 2653 if (tp->t_lgrp_affinity)
2649 2654 lgrp_move_thread(tp,
2650 2655 lgrp_choose(tp, tp->t_cpupart), 1);
2651 2656
2652 2657 if (tp->t_state == TS_ONPROC &&
2653 2658 tp->t_cpu->cpu_part != tp->t_cpupart)
2654 2659 cpu_surrender(tp);
2655 2660 } else {
2656 2661 lpl_t *lpl;
2657 2662
2658 2663 tp->t_bound_cpu = cp;
2659 2664 ASSERT(cp->cpu_lpl != NULL);
2660 2665
2661 2666 /*
2662 2667 * Set home to lgroup with most affinity containing CPU
2663 2668 * that thread is being bound or minimum bounding
2664 2669 * lgroup if no affinities set
2665 2670 */
2666 2671 if (tp->t_lgrp_affinity)
2667 2672 lpl = lgrp_affinity_best(tp, tp->t_cpupart,
2668 2673 LGRP_NONE, B_FALSE);
2669 2674 else
2670 2675 lpl = cp->cpu_lpl;
2671 2676
2672 2677 if (tp->t_lpl != lpl) {
2673 2678 /* can't grab cpu_lock */
2674 2679 lgrp_move_thread(tp, lpl, 1);
2675 2680 }
2676 2681
2677 2682 /*
2678 2683 * Make the thread switch to the bound CPU.
2679 2684 * If the thread is runnable, we need to
2680 2685 * requeue it even if t_cpu is already set
2681 2686 * to the right CPU, since it may be on a
2682 2687 * kpreempt queue and need to move to a local
2683 2688 * queue. We could check t_disp_queue to
2684 2689 * avoid unnecessary overhead if it's already
2685 2690 * on the right queue, but since this isn't
2686 2691 * a performance-critical operation it doesn't
2687 2692 * seem worth the extra code and complexity.
2688 2693 *
2689 2694 * If the thread is weakbound to the cpu then it will
2690 2695 * resist the new binding request until the weak
2691 2696 * binding drops. The cpu_surrender or requeueing
2692 2697 * below could be skipped in such cases (since it
2693 2698 * will have no effect), but that would require
2694 2699 * thread_allowmigrate to acquire thread_lock so
2695 2700 * we'll take the very occasional hit here instead.
2696 2701 */
2697 2702 if (tp->t_state == TS_ONPROC) {
2698 2703 cpu_surrender(tp);
2699 2704 } else if (tp->t_state == TS_RUN) {
2700 2705 cpu_t *ocp = tp->t_cpu;
2701 2706
2702 2707 (void) dispdeq(tp);
2703 2708 setbackdq(tp);
2704 2709 /*
2705 2710 * Either on the bound CPU's disp queue now,
2706 2711 * or swapped out or on the swap queue.
2707 2712 */
2708 2713 ASSERT(tp->t_disp_queue == cp->cpu_disp ||
2709 2714 tp->t_weakbound_cpu == ocp ||
2710 2715 (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ))
2711 2716 != TS_LOAD);
2712 2717 }
2713 2718 }
2714 2719 }
2715 2720
2716 2721 /*
2717 2722 * Our binding has changed; set TP_CHANGEBIND.
2718 2723 */
2719 2724 tp->t_proc_flag |= TP_CHANGEBIND;
2720 2725 aston(tp);
2721 2726
2722 2727 thread_unlock(tp);
2723 2728
2724 2729 return (0);
2725 2730 }
2726 2731
2727 2732
2728 2733 cpuset_t *
2729 2734 cpuset_alloc(int kmflags)
2730 2735 {
2731 2736 return (kmem_alloc(sizeof (cpuset_t), kmflags));
2732 2737 }
2733 2738
2734 2739 void
2735 2740 cpuset_free(cpuset_t *s)
2736 2741 {
2737 2742 kmem_free(s, sizeof (cpuset_t));
2738 2743 }
2739 2744
2740 2745 void
2741 2746 cpuset_all(cpuset_t *s)
2742 2747 {
2743 2748 int i;
2744 2749
2745 2750 for (i = 0; i < CPUSET_WORDS; i++)
2746 2751 s->cpub[i] = ~0UL;
2747 2752 }
2748 2753
2749 2754 void
2750 2755 cpuset_all_but(cpuset_t *s, const uint_t cpu)
2751 2756 {
2752 2757 cpuset_all(s);
2753 2758 CPUSET_DEL(*s, cpu);
2754 2759 }
2755 2760
2756 2761 void
2757 2762 cpuset_only(cpuset_t *s, const uint_t cpu)
2758 2763 {
2759 2764 CPUSET_ZERO(*s);
2760 2765 CPUSET_ADD(*s, cpu);
2761 2766 }
2762 2767
2763 2768 long
2764 2769 cpu_in_set(const cpuset_t *s, const uint_t cpu)
2765 2770 {
2766 2771 VERIFY(cpu < NCPU);
2767 2772 return (BT_TEST(s->cpub, cpu));
2768 2773 }
2769 2774
2770 2775 void
2771 2776 cpuset_add(cpuset_t *s, const uint_t cpu)
2772 2777 {
2773 2778 VERIFY(cpu < NCPU);
2774 2779 BT_SET(s->cpub, cpu);
2775 2780 }
2776 2781
2777 2782 void
2778 2783 cpuset_del(cpuset_t *s, const uint_t cpu)
2779 2784 {
2780 2785 VERIFY(cpu < NCPU);
2781 2786 BT_CLEAR(s->cpub, cpu);
2782 2787 }
2783 2788
2784 2789 int
2785 2790 cpuset_isnull(const cpuset_t *s)
2786 2791 {
2787 2792 int i;
2788 2793
2789 2794 for (i = 0; i < CPUSET_WORDS; i++) {
2790 2795 if (s->cpub[i] != 0)
2791 2796 return (0);
2792 2797 }
2793 2798 return (1);
2794 2799 }
2795 2800
2796 2801 int
2797 2802 cpuset_isequal(const cpuset_t *s1, const cpuset_t *s2)
2798 2803 {
2799 2804 int i;
2800 2805
2801 2806 for (i = 0; i < CPUSET_WORDS; i++) {
2802 2807 if (s1->cpub[i] != s2->cpub[i])
2803 2808 return (0);
2804 2809 }
2805 2810 return (1);
2806 2811 }
2807 2812
2808 2813 uint_t
2809 2814 cpuset_find(const cpuset_t *s)
2810 2815 {
2811 2816
2812 2817 uint_t i;
2813 2818 uint_t cpu = (uint_t)-1;
2814 2819
2815 2820 /*
2816 2821 * Find a cpu in the cpuset
2817 2822 */
2818 2823 for (i = 0; i < CPUSET_WORDS; i++) {
2819 2824 cpu = (uint_t)(lowbit(s->cpub[i]) - 1);
2820 2825 if (cpu != (uint_t)-1) {
2821 2826 cpu += i * BT_NBIPUL;
2822 2827 break;
2823 2828 }
2824 2829 }
2825 2830 return (cpu);
2826 2831 }
2827 2832
2828 2833 void
2829 2834 cpuset_bounds(const cpuset_t *s, uint_t *smallestid, uint_t *largestid)
2830 2835 {
2831 2836 int i, j;
2832 2837 uint_t bit;
2833 2838
2834 2839 /*
2835 2840 * First, find the smallest cpu id in the set.
2836 2841 */
2837 2842 for (i = 0; i < CPUSET_WORDS; i++) {
2838 2843 if (s->cpub[i] != 0) {
2839 2844 bit = (uint_t)(lowbit(s->cpub[i]) - 1);
2840 2845 ASSERT(bit != (uint_t)-1);
2841 2846 *smallestid = bit + (i * BT_NBIPUL);
2842 2847
2843 2848 /*
2844 2849 * Now find the largest cpu id in
2845 2850 * the set and return immediately.
2846 2851 * Done in an inner loop to avoid
2847 2852 * having to break out of the first
2848 2853 * loop.
2849 2854 */
2850 2855 for (j = CPUSET_WORDS - 1; j >= i; j--) {
2851 2856 if (s->cpub[j] != 0) {
2852 2857 bit = (uint_t)(highbit(s->cpub[j]) - 1);
2853 2858 ASSERT(bit != (uint_t)-1);
2854 2859 *largestid = bit + (j * BT_NBIPUL);
2855 2860 ASSERT(*largestid >= *smallestid);
2856 2861 return;
2857 2862 }
2858 2863 }
2859 2864
2860 2865 /*
2861 2866 * If this code is reached, a
2862 2867 * smallestid was found, but not a
2863 2868 * largestid. The cpuset must have
2864 2869 * been changed during the course
2865 2870 * of this function call.
2866 2871 */
2867 2872 ASSERT(0);
2868 2873 }
2869 2874 }
2870 2875 *smallestid = *largestid = CPUSET_NOTINSET;
2871 2876 }
2872 2877
2873 2878 void
2874 2879 cpuset_atomic_del(cpuset_t *s, const uint_t cpu)
2875 2880 {
2876 2881 VERIFY(cpu < NCPU);
2877 2882 BT_ATOMIC_CLEAR(s->cpub, (cpu))
2878 2883 }
2879 2884
2880 2885 void
2881 2886 cpuset_atomic_add(cpuset_t *s, const uint_t cpu)
2882 2887 {
2883 2888 VERIFY(cpu < NCPU);
2884 2889 BT_ATOMIC_SET(s->cpub, (cpu))
2885 2890 }
2886 2891
2887 2892 long
2888 2893 cpuset_atomic_xadd(cpuset_t *s, const uint_t cpu)
2889 2894 {
2890 2895 long res;
2891 2896
2892 2897 VERIFY(cpu < NCPU);
2893 2898 BT_ATOMIC_SET_EXCL(s->cpub, cpu, res);
2894 2899 return (res);
2895 2900 }
2896 2901
2897 2902 long
2898 2903 cpuset_atomic_xdel(cpuset_t *s, const uint_t cpu)
2899 2904 {
2900 2905 long res;
2901 2906
2902 2907 VERIFY(cpu < NCPU);
2903 2908 BT_ATOMIC_CLEAR_EXCL(s->cpub, cpu, res);
2904 2909 return (res);
2905 2910 }
2906 2911
2907 2912 void
2908 2913 cpuset_or(cpuset_t *dst, cpuset_t *src)
2909 2914 {
2910 2915 for (int i = 0; i < CPUSET_WORDS; i++) {
2911 2916 dst->cpub[i] |= src->cpub[i];
2912 2917 }
2913 2918 }
2914 2919
2915 2920 void
2916 2921 cpuset_xor(cpuset_t *dst, cpuset_t *src)
2917 2922 {
2918 2923 for (int i = 0; i < CPUSET_WORDS; i++) {
2919 2924 dst->cpub[i] ^= src->cpub[i];
2920 2925 }
2921 2926 }
2922 2927
2923 2928 void
2924 2929 cpuset_and(cpuset_t *dst, cpuset_t *src)
2925 2930 {
2926 2931 for (int i = 0; i < CPUSET_WORDS; i++) {
2927 2932 dst->cpub[i] &= src->cpub[i];
2928 2933 }
2929 2934 }
2930 2935
2931 2936 void
2932 2937 cpuset_zero(cpuset_t *dst)
2933 2938 {
2934 2939 for (int i = 0; i < CPUSET_WORDS; i++) {
2935 2940 dst->cpub[i] = 0;
2936 2941 }
2937 2942 }
2938 2943
2939 2944
2940 2945 /*
2941 2946 * Unbind threads bound to specified CPU.
2942 2947 *
2943 2948 * If `unbind_all_threads' is true, unbind all user threads bound to a given
2944 2949 * CPU. Otherwise unbind all soft-bound user threads.
2945 2950 */
2946 2951 int
2947 2952 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads)
2948 2953 {
2949 2954 processorid_t obind;
2950 2955 kthread_t *tp;
2951 2956 int ret = 0;
2952 2957 proc_t *pp;
2953 2958 int err, berr = 0;
2954 2959
2955 2960 ASSERT(MUTEX_HELD(&cpu_lock));
2956 2961
2957 2962 mutex_enter(&pidlock);
2958 2963 for (pp = practive; pp != NULL; pp = pp->p_next) {
2959 2964 mutex_enter(&pp->p_lock);
2960 2965 tp = pp->p_tlist;
2961 2966 /*
2962 2967 * Skip zombies, kernel processes, and processes in
2963 2968 * other zones, if called from a non-global zone.
2964 2969 */
2965 2970 if (tp == NULL || (pp->p_flag & SSYS) ||
2966 2971 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
2967 2972 mutex_exit(&pp->p_lock);
2968 2973 continue;
2969 2974 }
2970 2975 do {
2971 2976 if (tp->t_bind_cpu != cpu)
2972 2977 continue;
2973 2978 /*
2974 2979 * Skip threads with hard binding when
2975 2980 * `unbind_all_threads' is not specified.
2976 2981 */
2977 2982 if (!unbind_all_threads && TB_CPU_IS_HARD(tp))
2978 2983 continue;
2979 2984 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr);
2980 2985 if (ret == 0)
2981 2986 ret = err;
2982 2987 } while ((tp = tp->t_forw) != pp->p_tlist);
2983 2988 mutex_exit(&pp->p_lock);
2984 2989 }
2985 2990 mutex_exit(&pidlock);
2986 2991 if (ret == 0)
2987 2992 ret = berr;
2988 2993 return (ret);
2989 2994 }
2990 2995
2991 2996
2992 2997 /*
2993 2998 * Destroy all remaining bound threads on a cpu.
2994 2999 */
2995 3000 void
2996 3001 cpu_destroy_bound_threads(cpu_t *cp)
2997 3002 {
2998 3003 extern id_t syscid;
2999 3004 register kthread_id_t t, tlist, tnext;
3000 3005
3001 3006 /*
3002 3007 * Destroy all remaining bound threads on the cpu. This
3003 3008 * should include both the interrupt threads and the idle thread.
3004 3009 * This requires some care, since we need to traverse the
3005 3010 * thread list with the pidlock mutex locked, but thread_free
3006 3011 * also locks the pidlock mutex. So, we collect the threads
3007 3012 * we're going to reap in a list headed by "tlist", then we
3008 3013 * unlock the pidlock mutex and traverse the tlist list,
3009 3014 * doing thread_free's on the thread's. Simple, n'est pas?
3010 3015 * Also, this depends on thread_free not mucking with the
3011 3016 * t_next and t_prev links of the thread.
3012 3017 */
3013 3018
3014 3019 if ((t = curthread) != NULL) {
3015 3020
3016 3021 tlist = NULL;
3017 3022 mutex_enter(&pidlock);
3018 3023 do {
3019 3024 tnext = t->t_next;
3020 3025 if (t->t_bound_cpu == cp) {
3021 3026
3022 3027 /*
3023 3028 * We've found a bound thread, carefully unlink
3024 3029 * it out of the thread list, and add it to
3025 3030 * our "tlist". We "know" we don't have to
3026 3031 * worry about unlinking curthread (the thread
3027 3032 * that is executing this code).
3028 3033 */
3029 3034 t->t_next->t_prev = t->t_prev;
3030 3035 t->t_prev->t_next = t->t_next;
3031 3036 t->t_next = tlist;
3032 3037 tlist = t;
3033 3038 ASSERT(t->t_cid == syscid);
3034 3039 /* wake up anyone blocked in thread_join */
3035 3040 cv_broadcast(&t->t_joincv);
3036 3041 /*
3037 3042 * t_lwp set by interrupt threads and not
3038 3043 * cleared.
3039 3044 */
3040 3045 t->t_lwp = NULL;
3041 3046 /*
3042 3047 * Pause and idle threads always have
3043 3048 * t_state set to TS_ONPROC.
3044 3049 */
3045 3050 t->t_state = TS_FREE;
3046 3051 t->t_prev = NULL; /* Just in case */
3047 3052 }
3048 3053
3049 3054 } while ((t = tnext) != curthread);
3050 3055
3051 3056 mutex_exit(&pidlock);
3052 3057
3053 3058 mutex_sync();
3054 3059 for (t = tlist; t != NULL; t = tnext) {
3055 3060 tnext = t->t_next;
3056 3061 thread_free(t);
3057 3062 }
3058 3063 }
3059 3064 }
3060 3065
3061 3066 /*
3062 3067 * Update the cpu_supp_freqs of this cpu. This information is returned
3063 3068 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
3064 3069 * maintain the kstat data size.
3065 3070 */
3066 3071 void
3067 3072 cpu_set_supp_freqs(cpu_t *cp, const char *freqs)
3068 3073 {
3069 3074 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
3070 3075 const char *lfreqs = clkstr;
3071 3076 boolean_t kstat_exists = B_FALSE;
3072 3077 kstat_t *ksp;
3073 3078 size_t len;
3074 3079
3075 3080 /*
3076 3081 * A NULL pointer means we only support one speed.
3077 3082 */
3078 3083 if (freqs == NULL)
3079 3084 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64,
3080 3085 cp->cpu_curr_clock);
3081 3086 else
3082 3087 lfreqs = freqs;
3083 3088
3084 3089 /*
3085 3090 * Make sure the frequency doesn't change while a snapshot is
3086 3091 * going on. Of course, we only need to worry about this if
3087 3092 * the kstat exists.
3088 3093 */
3089 3094 if ((ksp = cp->cpu_info_kstat) != NULL) {
3090 3095 mutex_enter(ksp->ks_lock);
3091 3096 kstat_exists = B_TRUE;
3092 3097 }
3093 3098
3094 3099 /*
3095 3100 * Free any previously allocated string and if the kstat
3096 3101 * already exists, then update its data size.
3097 3102 */
3098 3103 if (cp->cpu_supp_freqs != NULL) {
3099 3104 len = strlen(cp->cpu_supp_freqs) + 1;
3100 3105 kmem_free(cp->cpu_supp_freqs, len);
3101 3106 if (kstat_exists)
3102 3107 ksp->ks_data_size -= len;
3103 3108 }
3104 3109
3105 3110 /*
3106 3111 * Allocate the new string and set the pointer.
3107 3112 */
3108 3113 len = strlen(lfreqs) + 1;
3109 3114 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP);
3110 3115 (void) strcpy(cp->cpu_supp_freqs, lfreqs);
3111 3116
3112 3117 /*
3113 3118 * If the kstat already exists then update the data size and
3114 3119 * free the lock.
3115 3120 */
3116 3121 if (kstat_exists) {
3117 3122 ksp->ks_data_size += len;
3118 3123 mutex_exit(ksp->ks_lock);
3119 3124 }
3120 3125 }
3121 3126
3122 3127 /*
3123 3128 * Indicate the current CPU's clock freqency (in Hz).
3124 3129 * The calling context must be such that CPU references are safe.
3125 3130 */
3126 3131 void
3127 3132 cpu_set_curr_clock(uint64_t new_clk)
3128 3133 {
3129 3134 uint64_t old_clk;
3130 3135
3131 3136 old_clk = CPU->cpu_curr_clock;
3132 3137 CPU->cpu_curr_clock = new_clk;
3133 3138
3134 3139 /*
3135 3140 * The cpu-change-speed DTrace probe exports the frequency in Hz
3136 3141 */
3137 3142 DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id,
3138 3143 uint64_t, old_clk, uint64_t, new_clk);
3139 3144 }
3140 3145
3141 3146 /*
3142 3147 * processor_info(2) and p_online(2) status support functions
3143 3148 * The constants returned by the cpu_get_state() and cpu_get_state_str() are
3144 3149 * for use in communicating processor state information to userland. Kernel
3145 3150 * subsystems should only be using the cpu_flags value directly. Subsystems
3146 3151 * modifying cpu_flags should record the state change via a call to the
3147 3152 * cpu_set_state().
3148 3153 */
3149 3154
3150 3155 /*
3151 3156 * Update the pi_state of this CPU. This function provides the CPU status for
3152 3157 * the information returned by processor_info(2).
3153 3158 */
3154 3159 void
3155 3160 cpu_set_state(cpu_t *cpu)
3156 3161 {
3157 3162 ASSERT(MUTEX_HELD(&cpu_lock));
3158 3163 cpu->cpu_type_info.pi_state = cpu_get_state(cpu);
3159 3164 cpu->cpu_state_begin = gethrestime_sec();
3160 3165 pool_cpu_mod = gethrtime();
3161 3166 }
3162 3167
3163 3168 /*
3164 3169 * Return offline/online/other status for the indicated CPU. Use only for
3165 3170 * communication with user applications; cpu_flags provides the in-kernel
3166 3171 * interface.
3167 3172 */
3168 3173 int
3169 3174 cpu_get_state(cpu_t *cpu)
3170 3175 {
3171 3176 ASSERT(MUTEX_HELD(&cpu_lock));
3172 3177 if (cpu->cpu_flags & CPU_POWEROFF)
3173 3178 return (P_POWEROFF);
3174 3179 else if (cpu->cpu_flags & CPU_FAULTED)
3175 3180 return (P_FAULTED);
3176 3181 else if (cpu->cpu_flags & CPU_SPARE)
3177 3182 return (P_SPARE);
3178 3183 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)
3179 3184 return (P_OFFLINE);
3180 3185 else if (cpu->cpu_flags & CPU_ENABLE)
3181 3186 return (P_ONLINE);
3182 3187 else
3183 3188 return (P_NOINTR);
3184 3189 }
3185 3190
3186 3191 /*
3187 3192 * Return processor_info(2) state as a string.
3188 3193 */
3189 3194 const char *
3190 3195 cpu_get_state_str(cpu_t *cpu)
3191 3196 {
3192 3197 const char *string;
3193 3198
3194 3199 switch (cpu_get_state(cpu)) {
3195 3200 case P_ONLINE:
3196 3201 string = PS_ONLINE;
3197 3202 break;
3198 3203 case P_POWEROFF:
3199 3204 string = PS_POWEROFF;
3200 3205 break;
3201 3206 case P_NOINTR:
3202 3207 string = PS_NOINTR;
3203 3208 break;
3204 3209 case P_SPARE:
3205 3210 string = PS_SPARE;
3206 3211 break;
3207 3212 case P_FAULTED:
3208 3213 string = PS_FAULTED;
3209 3214 break;
3210 3215 case P_OFFLINE:
3211 3216 string = PS_OFFLINE;
3212 3217 break;
3213 3218 default:
3214 3219 string = "unknown";
3215 3220 break;
3216 3221 }
3217 3222 return (string);
3218 3223 }
3219 3224
3220 3225 /*
3221 3226 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
3222 3227 * kstats, respectively. This is done when a CPU is initialized or placed
3223 3228 * online via p_online(2).
3224 3229 */
3225 3230 static void
3226 3231 cpu_stats_kstat_create(cpu_t *cp)
3227 3232 {
3228 3233 int instance = cp->cpu_id;
3229 3234 char *module = "cpu";
3230 3235 char *class = "misc";
3231 3236 kstat_t *ksp;
3232 3237 zoneid_t zoneid;
3233 3238
3234 3239 ASSERT(MUTEX_HELD(&cpu_lock));
3235 3240
3236 3241 if (pool_pset_enabled())
3237 3242 zoneid = GLOBAL_ZONEID;
3238 3243 else
3239 3244 zoneid = ALL_ZONES;
3240 3245 /*
3241 3246 * Create named kstats
3242 3247 */
3243 3248 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \
3244 3249 ksp = kstat_create_zone(module, instance, (name), class, \
3245 3250 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \
3246 3251 zoneid); \
3247 3252 if (ksp != NULL) { \
3248 3253 ksp->ks_private = cp; \
3249 3254 ksp->ks_update = (update_func); \
3250 3255 kstat_install(ksp); \
3251 3256 } else \
3252 3257 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
3253 3258 module, instance, (name));
3254 3259
3255 3260 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template),
3256 3261 cpu_sys_stats_ks_update);
3257 3262 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template),
3258 3263 cpu_vm_stats_ks_update);
3259 3264
3260 3265 /*
3261 3266 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
3262 3267 */
3263 3268 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL,
3264 3269 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid);
3265 3270 if (ksp != NULL) {
3266 3271 ksp->ks_update = cpu_stat_ks_update;
3267 3272 ksp->ks_private = cp;
3268 3273 kstat_install(ksp);
3269 3274 }
3270 3275 }
3271 3276
3272 3277 static void
3273 3278 cpu_stats_kstat_destroy(cpu_t *cp)
3274 3279 {
3275 3280 char ks_name[KSTAT_STRLEN];
3276 3281
3277 3282 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id);
3278 3283 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name);
3279 3284
3280 3285 kstat_delete_byname("cpu", cp->cpu_id, "sys");
3281 3286 kstat_delete_byname("cpu", cp->cpu_id, "vm");
3282 3287 }
3283 3288
3284 3289 static int
3285 3290 cpu_sys_stats_ks_update(kstat_t *ksp, int rw)
3286 3291 {
3287 3292 cpu_t *cp = (cpu_t *)ksp->ks_private;
3288 3293 struct cpu_sys_stats_ks_data *csskd;
3289 3294 cpu_sys_stats_t *css;
3290 3295 hrtime_t msnsecs[NCMSTATES];
3291 3296 int i;
3292 3297
3293 3298 if (rw == KSTAT_WRITE)
3294 3299 return (EACCES);
3295 3300
3296 3301 csskd = ksp->ks_data;
3297 3302 css = &cp->cpu_stats.sys;
3298 3303
3299 3304 /*
3300 3305 * Read CPU mstate, but compare with the last values we
3301 3306 * received to make sure that the returned kstats never
3302 3307 * decrease.
3303 3308 */
3304 3309
3305 3310 get_cpu_mstate(cp, msnsecs);
3306 3311 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE])
3307 3312 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64;
3308 3313 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER])
3309 3314 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64;
3310 3315 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM])
3311 3316 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64;
3312 3317
3313 3318 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data,
3314 3319 sizeof (cpu_sys_stats_ks_data_template));
3315 3320
3316 3321 csskd->cpu_ticks_wait.value.ui64 = 0;
3317 3322 csskd->wait_ticks_io.value.ui64 = 0;
3318 3323
3319 3324 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE];
3320 3325 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER];
3321 3326 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM];
3322 3327 csskd->cpu_ticks_idle.value.ui64 =
3323 3328 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64);
3324 3329 csskd->cpu_ticks_user.value.ui64 =
3325 3330 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64);
3326 3331 csskd->cpu_ticks_kernel.value.ui64 =
3327 3332 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64);
3328 3333 csskd->cpu_nsec_dtrace.value.ui64 = cp->cpu_dtrace_nsec;
3329 3334 csskd->dtrace_probes.value.ui64 = cp->cpu_dtrace_probes;
3330 3335 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast;
3331 3336 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload;
3332 3337 csskd->bread.value.ui64 = css->bread;
3333 3338 csskd->bwrite.value.ui64 = css->bwrite;
3334 3339 csskd->lread.value.ui64 = css->lread;
3335 3340 csskd->lwrite.value.ui64 = css->lwrite;
3336 3341 csskd->phread.value.ui64 = css->phread;
3337 3342 csskd->phwrite.value.ui64 = css->phwrite;
3338 3343 csskd->pswitch.value.ui64 = css->pswitch;
3339 3344 csskd->trap.value.ui64 = css->trap;
3340 3345 csskd->intr.value.ui64 = 0;
3341 3346 for (i = 0; i < PIL_MAX; i++)
3342 3347 csskd->intr.value.ui64 += css->intr[i];
3343 3348 csskd->syscall.value.ui64 = css->syscall;
3344 3349 csskd->sysread.value.ui64 = css->sysread;
3345 3350 csskd->syswrite.value.ui64 = css->syswrite;
3346 3351 csskd->sysfork.value.ui64 = css->sysfork;
3347 3352 csskd->sysvfork.value.ui64 = css->sysvfork;
3348 3353 csskd->sysexec.value.ui64 = css->sysexec;
3349 3354 csskd->readch.value.ui64 = css->readch;
3350 3355 csskd->writech.value.ui64 = css->writech;
3351 3356 csskd->rcvint.value.ui64 = css->rcvint;
3352 3357 csskd->xmtint.value.ui64 = css->xmtint;
3353 3358 csskd->mdmint.value.ui64 = css->mdmint;
3354 3359 csskd->rawch.value.ui64 = css->rawch;
3355 3360 csskd->canch.value.ui64 = css->canch;
3356 3361 csskd->outch.value.ui64 = css->outch;
3357 3362 csskd->msg.value.ui64 = css->msg;
3358 3363 csskd->sema.value.ui64 = css->sema;
3359 3364 csskd->namei.value.ui64 = css->namei;
3360 3365 csskd->ufsiget.value.ui64 = css->ufsiget;
3361 3366 csskd->ufsdirblk.value.ui64 = css->ufsdirblk;
3362 3367 csskd->ufsipage.value.ui64 = css->ufsipage;
3363 3368 csskd->ufsinopage.value.ui64 = css->ufsinopage;
3364 3369 csskd->procovf.value.ui64 = css->procovf;
3365 3370 csskd->intrthread.value.ui64 = 0;
3366 3371 for (i = 0; i < LOCK_LEVEL - 1; i++)
3367 3372 csskd->intrthread.value.ui64 += css->intr[i];
3368 3373 csskd->intrblk.value.ui64 = css->intrblk;
3369 3374 csskd->intrunpin.value.ui64 = css->intrunpin;
3370 3375 csskd->idlethread.value.ui64 = css->idlethread;
3371 3376 csskd->inv_swtch.value.ui64 = css->inv_swtch;
3372 3377 csskd->nthreads.value.ui64 = css->nthreads;
3373 3378 csskd->cpumigrate.value.ui64 = css->cpumigrate;
3374 3379 csskd->xcalls.value.ui64 = css->xcalls;
3375 3380 csskd->mutex_adenters.value.ui64 = css->mutex_adenters;
3376 3381 csskd->rw_rdfails.value.ui64 = css->rw_rdfails;
3377 3382 csskd->rw_wrfails.value.ui64 = css->rw_wrfails;
3378 3383 csskd->modload.value.ui64 = css->modload;
3379 3384 csskd->modunload.value.ui64 = css->modunload;
3380 3385 csskd->bawrite.value.ui64 = css->bawrite;
3381 3386 csskd->iowait.value.ui64 = css->iowait;
3382 3387
3383 3388 return (0);
3384 3389 }
3385 3390
3386 3391 static int
3387 3392 cpu_vm_stats_ks_update(kstat_t *ksp, int rw)
3388 3393 {
3389 3394 cpu_t *cp = (cpu_t *)ksp->ks_private;
3390 3395 struct cpu_vm_stats_ks_data *cvskd;
3391 3396 cpu_vm_stats_t *cvs;
3392 3397
3393 3398 if (rw == KSTAT_WRITE)
3394 3399 return (EACCES);
3395 3400
3396 3401 cvs = &cp->cpu_stats.vm;
3397 3402 cvskd = ksp->ks_data;
3398 3403
3399 3404 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data,
3400 3405 sizeof (cpu_vm_stats_ks_data_template));
3401 3406 cvskd->pgrec.value.ui64 = cvs->pgrec;
3402 3407 cvskd->pgfrec.value.ui64 = cvs->pgfrec;
3403 3408 cvskd->pgin.value.ui64 = cvs->pgin;
3404 3409 cvskd->pgpgin.value.ui64 = cvs->pgpgin;
3405 3410 cvskd->pgout.value.ui64 = cvs->pgout;
3406 3411 cvskd->pgpgout.value.ui64 = cvs->pgpgout;
3407 3412 cvskd->swapin.value.ui64 = cvs->swapin;
3408 3413 cvskd->pgswapin.value.ui64 = cvs->pgswapin;
3409 3414 cvskd->swapout.value.ui64 = cvs->swapout;
3410 3415 cvskd->pgswapout.value.ui64 = cvs->pgswapout;
3411 3416 cvskd->zfod.value.ui64 = cvs->zfod;
3412 3417 cvskd->dfree.value.ui64 = cvs->dfree;
3413 3418 cvskd->scan.value.ui64 = cvs->scan;
3414 3419 cvskd->rev.value.ui64 = cvs->rev;
3415 3420 cvskd->hat_fault.value.ui64 = cvs->hat_fault;
3416 3421 cvskd->as_fault.value.ui64 = cvs->as_fault;
3417 3422 cvskd->maj_fault.value.ui64 = cvs->maj_fault;
3418 3423 cvskd->cow_fault.value.ui64 = cvs->cow_fault;
3419 3424 cvskd->prot_fault.value.ui64 = cvs->prot_fault;
3420 3425 cvskd->softlock.value.ui64 = cvs->softlock;
3421 3426 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt;
3422 3427 cvskd->pgrrun.value.ui64 = cvs->pgrrun;
3423 3428 cvskd->execpgin.value.ui64 = cvs->execpgin;
3424 3429 cvskd->execpgout.value.ui64 = cvs->execpgout;
3425 3430 cvskd->execfree.value.ui64 = cvs->execfree;
3426 3431 cvskd->anonpgin.value.ui64 = cvs->anonpgin;
3427 3432 cvskd->anonpgout.value.ui64 = cvs->anonpgout;
3428 3433 cvskd->anonfree.value.ui64 = cvs->anonfree;
3429 3434 cvskd->fspgin.value.ui64 = cvs->fspgin;
3430 3435 cvskd->fspgout.value.ui64 = cvs->fspgout;
3431 3436 cvskd->fsfree.value.ui64 = cvs->fsfree;
3432 3437
3433 3438 return (0);
3434 3439 }
3435 3440
3436 3441 static int
3437 3442 cpu_stat_ks_update(kstat_t *ksp, int rw)
3438 3443 {
3439 3444 cpu_stat_t *cso;
3440 3445 cpu_t *cp;
3441 3446 int i;
3442 3447 hrtime_t msnsecs[NCMSTATES];
3443 3448
3444 3449 cso = (cpu_stat_t *)ksp->ks_data;
3445 3450 cp = (cpu_t *)ksp->ks_private;
3446 3451
3447 3452 if (rw == KSTAT_WRITE)
3448 3453 return (EACCES);
3449 3454
3450 3455 /*
3451 3456 * Read CPU mstate, but compare with the last values we
3452 3457 * received to make sure that the returned kstats never
3453 3458 * decrease.
3454 3459 */
3455 3460
3456 3461 get_cpu_mstate(cp, msnsecs);
3457 3462 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3458 3463 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]);
3459 3464 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3460 3465 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE])
3461 3466 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE];
3462 3467 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER])
3463 3468 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER];
3464 3469 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM])
3465 3470 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM];
3466 3471 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0;
3467 3472 cso->cpu_sysinfo.wait[W_IO] = 0;
3468 3473 cso->cpu_sysinfo.wait[W_SWAP] = 0;
3469 3474 cso->cpu_sysinfo.wait[W_PIO] = 0;
3470 3475 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread);
3471 3476 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite);
3472 3477 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread);
3473 3478 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite);
3474 3479 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread);
3475 3480 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite);
3476 3481 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch);
3477 3482 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap);
3478 3483 cso->cpu_sysinfo.intr = 0;
3479 3484 for (i = 0; i < PIL_MAX; i++)
3480 3485 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]);
3481 3486 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall);
3482 3487 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread);
3483 3488 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite);
3484 3489 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork);
3485 3490 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork);
3486 3491 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec);
3487 3492 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch);
3488 3493 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech);
3489 3494 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint);
3490 3495 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint);
3491 3496 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint);
3492 3497 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch);
3493 3498 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch);
3494 3499 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch);
3495 3500 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg);
3496 3501 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema);
3497 3502 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei);
3498 3503 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget);
3499 3504 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk);
3500 3505 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage);
3501 3506 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage);
3502 3507 cso->cpu_sysinfo.inodeovf = 0;
3503 3508 cso->cpu_sysinfo.fileovf = 0;
3504 3509 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf);
3505 3510 cso->cpu_sysinfo.intrthread = 0;
3506 3511 for (i = 0; i < LOCK_LEVEL - 1; i++)
3507 3512 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]);
3508 3513 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk);
3509 3514 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread);
3510 3515 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch);
3511 3516 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads);
3512 3517 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate);
3513 3518 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls);
3514 3519 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters);
3515 3520 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails);
3516 3521 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails);
3517 3522 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload);
3518 3523 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload);
3519 3524 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite);
3520 3525 cso->cpu_sysinfo.rw_enters = 0;
3521 3526 cso->cpu_sysinfo.win_uo_cnt = 0;
3522 3527 cso->cpu_sysinfo.win_uu_cnt = 0;
3523 3528 cso->cpu_sysinfo.win_so_cnt = 0;
3524 3529 cso->cpu_sysinfo.win_su_cnt = 0;
3525 3530 cso->cpu_sysinfo.win_suo_cnt = 0;
3526 3531
3527 3532 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait);
3528 3533 cso->cpu_syswait.swap = 0;
3529 3534 cso->cpu_syswait.physio = 0;
3530 3535
3531 3536 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec);
3532 3537 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec);
3533 3538 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin);
3534 3539 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin);
3535 3540 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout);
3536 3541 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout);
3537 3542 cso->cpu_vminfo.swapin = CPU_STATS(cp, vm.swapin);
3538 3543 cso->cpu_vminfo.pgswapin = CPU_STATS(cp, vm.pgswapin);
3539 3544 cso->cpu_vminfo.swapout = CPU_STATS(cp, vm.swapout);
3540 3545 cso->cpu_vminfo.pgswapout = CPU_STATS(cp, vm.pgswapout);
3541 3546 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod);
3542 3547 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree);
3543 3548 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan);
3544 3549 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev);
3545 3550 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault);
3546 3551 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault);
3547 3552 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault);
3548 3553 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault);
3549 3554 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault);
3550 3555 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock);
3551 3556 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt);
3552 3557 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun);
3553 3558 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin);
3554 3559 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout);
3555 3560 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree);
3556 3561 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin);
3557 3562 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout);
3558 3563 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree);
3559 3564 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin);
3560 3565 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout);
3561 3566 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree);
3562 3567
3563 3568 return (0);
3564 3569 }
↓ open down ↓ |
2015 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX