1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2011 Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/swap.h>
36 #include <sys/file.h>
37 #include <sys/proc.h>
38 #include <sys/var.h>
39 #include <sys/uadmin.h>
40 #include <sys/signal.h>
41 #include <sys/time.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/modctl.h>
44 #include <sys/callb.h>
45 #include <sys/dumphdr.h>
46 #include <sys/debug.h>
47 #include <sys/ftrace.h>
48 #include <sys/cmn_err.h>
49 #include <sys/panic.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/policy.h>
53 #include <sys/zone.h>
54 #include <sys/condvar.h>
55 #include <sys/thread.h>
56 #include <sys/sdt.h>
57
58 /*
59 * Administrivia system call. We provide this in two flavors: one for calling
60 * from the system call path (uadmin), and the other for calling from elsewhere
61 * within the kernel (kadmin). Callers must beware that certain uadmin cmd
62 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
63 */
64
65 extern ksema_t fsflush_sema;
66 kmutex_t ualock;
67 kcondvar_t uacond;
68 kthread_t *ua_shutdown_thread = NULL;
69
70 int sys_shutdown = 0;
71 volatile int fastreboot_dryrun = 0;
72
73 /*
74 * Kill all user processes in said zone. A special argument of ALL_ZONES is
75 * passed in when the system as a whole is shutting down. The lack of per-zone
76 * process lists is likely to make the following a performance bottleneck on a
77 * system with many zones.
78 */
79 void
80 killall(zoneid_t zoneid, boolean_t force)
81 {
82 proc_t *p;
83
84 ASSERT(zoneid != GLOBAL_ZONEID);
85 /*
86 * Kill all processes except kernel daemons and ourself.
87 * Make a first pass to stop all processes so they won't
88 * be trying to restart children as we kill them.
89 */
90 mutex_enter(&pidlock);
91 for (p = practive; p != NULL; p = p->p_next) {
92 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
93 p->p_exec != NULLVP && /* kernel daemons */
94 p->p_as != &kas &&
95 p->p_stat != SZOMB) {
96 mutex_enter(&p->p_lock);
97 p->p_flag |= SNOWAIT;
98 sigtoproc(p, NULL, SIGSTOP);
99 mutex_exit(&p->p_lock);
100 }
101 }
102 p = practive;
103 while (p != NULL) {
104 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
105 p->p_exec != NULLVP && /* kernel daemons */
106 p->p_as != &kas &&
107 p->p_stat != SIDL &&
108 p->p_stat != SZOMB) {
109 mutex_enter(&p->p_lock);
110 if (!force && sigismember(&p->p_sig, SIGKILL)) {
111 mutex_exit(&p->p_lock);
112 p = p->p_next;
113 } else {
114 sigtoproc(p, NULL, SIGKILL);
115 mutex_exit(&p->p_lock);
116 (void) cv_reltimedwait(&p->p_srwchan_cv,
117 &pidlock, hz, TR_CLOCK_TICK);
118 p = practive;
119 }
120 } else {
121 p = p->p_next;
122 }
123 }
124 mutex_exit(&pidlock);
125 }
126
127 int
128 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
129 {
130 int error = 0;
131 char *buf;
132 size_t buflen = 0;
133 boolean_t invoke_cb = B_FALSE;
134
135 /*
136 * We might be called directly by the kernel's fault-handling code, so
137 * we can't assert that the caller is in the global zone.
138 */
139
140 /*
141 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
142 * and that we have appropriate privileges for this action.
143 */
144 switch (cmd) {
145 case A_FTRACE:
146 case A_SHUTDOWN:
147 case A_REBOOT:
148 case A_REMOUNT:
149 case A_FREEZE:
150 case A_DUMP:
151 case A_SDTTEST:
152 case A_CONFIG:
153 if (secpolicy_sys_config(credp, B_FALSE) != 0)
154 return (EPERM);
155 break;
156
157 default:
158 return (EINVAL);
159 }
160
161 /*
162 * Serialize these operations on ualock. If it is held, the
163 * system should shutdown, reboot, or remount shortly, unless there is
164 * an error. We need a cv rather than just a mutex because proper
165 * functioning of A_REBOOT relies on being able to interrupt blocked
166 * userland callers.
167 *
168 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
169 * Other commands should never return.
170 */
171 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
172 cmd == A_CONFIG) {
173 mutex_enter(&ualock);
174 while (ua_shutdown_thread != NULL) {
175 if (cv_wait_sig(&uacond, &ualock) == 0) {
176 /*
177 * If we were interrupted, leave, and handle
178 * the signal (or exit, depending on what
179 * happened)
180 */
181 mutex_exit(&ualock);
182 return (EINTR);
183 }
184 }
185 ua_shutdown_thread = curthread;
186 mutex_exit(&ualock);
187 }
188
189 switch (cmd) {
190 case A_SHUTDOWN:
191 {
192 proc_t *p = ttoproc(curthread);
193
194 /*
195 * Release (almost) all of our own resources if we are called
196 * from a user context, however if we are calling kadmin() from
197 * a kernel context then we do not release these resources.
198 */
199 if (p != &p0) {
200 proc_is_exiting(p);
201 if ((error = exitlwps(0)) != 0) {
202 /*
203 * Another thread in this process also called
204 * exitlwps().
205 */
206 mutex_enter(&ualock);
207 ua_shutdown_thread = NULL;
208 cv_signal(&uacond);
209 mutex_exit(&ualock);
210 return (error);
211 }
212 mutex_enter(&p->p_lock);
213 p->p_flag |= SNOWAIT;
214 sigfillset(&p->p_ignore);
215 curthread->t_lwp->lwp_cursig = 0;
216 curthread->t_lwp->lwp_extsig = 0;
217 if (p->p_exec) {
218 vnode_t *exec_vp = p->p_exec;
219 p->p_exec = NULLVP;
220 mutex_exit(&p->p_lock);
221 VN_RELE(exec_vp);
222 } else {
223 mutex_exit(&p->p_lock);
224 }
225
226 pollcleanup();
227 closeall(P_FINFO(curproc));
228 relvm();
229
230 } else {
231 /*
232 * Reset t_cred if not set because much of the
233 * filesystem code depends on CRED() being valid.
234 */
235 if (curthread->t_cred == NULL)
236 curthread->t_cred = kcred;
237 }
238
239 /* indicate shutdown in progress */
240 sys_shutdown = 1;
241
242 /*
243 * Communcate that init shouldn't be restarted.
244 */
245 zone_shutdown_global();
246
247 killall(ALL_ZONES, B_FALSE);
248 /*
249 * If we are calling kadmin() from a kernel context then we
250 * do not release these resources.
251 */
252 if (ttoproc(curthread) != &p0) {
253 VN_RELE(PTOU(curproc)->u_cdir);
254 if (PTOU(curproc)->u_rdir)
255 VN_RELE(PTOU(curproc)->u_rdir);
256 if (PTOU(curproc)->u_cwd)
257 refstr_rele(PTOU(curproc)->u_cwd);
258
259 PTOU(curproc)->u_cdir = rootdir;
260 PTOU(curproc)->u_rdir = NULL;
261 PTOU(curproc)->u_cwd = NULL;
262 }
263
264 /*
265 * Allow the reboot/halt/poweroff code a chance to do
266 * anything it needs to whilst we still have filesystems
267 * mounted, like loading any modules necessary for later
268 * performing the actual poweroff.
269 */
270 if ((mdep != NULL) && (*(char *)mdep == '/')) {
271 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
272 mdpreboot(cmd, fcn, buf);
273 } else
274 mdpreboot(cmd, fcn, mdep);
275
276 /*
277 * Allow fsflush to finish running and then prevent it
278 * from ever running again so that vfs_unmountall() and
279 * vfs_syncall() can acquire the vfs locks they need.
280 */
281 sema_p(&fsflush_sema);
282 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
283
284 vfs_unmountall();
285 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
286 vfs_syncall();
287
288 dump_ereports();
289 dump_messages();
290
291 invoke_cb = B_TRUE;
292
293 /* FALLTHROUGH */
294 }
295
296 case A_REBOOT:
297 if ((mdep != NULL) && (*(char *)mdep == '/')) {
298 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
299 mdboot(cmd, fcn, buf, invoke_cb);
300 } else
301 mdboot(cmd, fcn, mdep, invoke_cb);
302 /* no return expected */
303 break;
304
305 case A_CONFIG:
306 switch (fcn) {
307 case AD_UPDATE_BOOT_CONFIG:
308 #ifndef __sparc
309 {
310 extern void fastboot_update_config(const char *);
311
312 fastboot_update_config(mdep);
313 }
314 #endif
315
316 break;
317 }
318 /* Let other threads enter the shutdown path now */
319 mutex_enter(&ualock);
320 ua_shutdown_thread = NULL;
321 cv_signal(&uacond);
322 mutex_exit(&ualock);
323 break;
324
325 case A_REMOUNT:
326 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
327 /* Let other threads enter the shutdown path now */
328 mutex_enter(&ualock);
329 ua_shutdown_thread = NULL;
330 cv_signal(&uacond);
331 mutex_exit(&ualock);
332 break;
333
334 case A_FREEZE:
335 {
336 /*
337 * This is the entrypoint for all suspend/resume actions.
338 */
339 extern int cpr(int, void *);
340
341 if (modload("misc", "cpr") == -1)
342 return (ENOTSUP);
343 /* Let the CPR module decide what to do with mdep */
344 error = cpr(fcn, mdep);
345 break;
346 }
347
348 case A_FTRACE:
349 {
350 switch (fcn) {
351 case AD_FTRACE_START:
352 (void) FTRACE_START();
353 break;
354 case AD_FTRACE_STOP:
355 (void) FTRACE_STOP();
356 break;
357 default:
358 error = EINVAL;
359 }
360 break;
361 }
362
363 case A_DUMP:
364 {
365 if (fcn == AD_NOSYNC) {
366 in_sync = 1;
367 break;
368 }
369
370 panic_bootfcn = fcn;
371 panic_forced = 1;
372
373 if ((mdep != NULL) && (*(char *)mdep == '/')) {
374 panic_bootstr = i_convert_boot_device_name(mdep,
375 NULL, &buflen);
376 } else
377 panic_bootstr = mdep;
378
379 #ifndef __sparc
380 extern void fastboot_update_and_load(int, char *);
381
382 fastboot_update_and_load(fcn, mdep);
383 #endif
384
385 panic("forced crash dump initiated at user request");
386 /*NOTREACHED*/
387 }
388
389 case A_SDTTEST:
390 {
391 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
392 int, 6, int, 7);
393 break;
394 }
395
396 default:
397 error = EINVAL;
398 }
399
400 return (error);
401 }
402
403 int
404 uadmin(int cmd, int fcn, uintptr_t mdep)
405 {
406 int error = 0, rv = 0;
407 size_t nbytes = 0;
408 cred_t *credp = CRED();
409 char *bootargs = NULL;
410 int reset_status = 0;
411
412 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
413 ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
414 &reset_status);
415 if (reset_status != 0)
416 return (EIO);
417 else
418 return (0);
419 }
420
421 /*
422 * The swapctl system call doesn't have its own entry point: it uses
423 * uadmin as a wrapper so we just call it directly from here.
424 */
425 if (cmd == A_SWAPCTL) {
426 if (get_udatamodel() == DATAMODEL_NATIVE)
427 error = swapctl(fcn, (void *)mdep, &rv);
428 #if defined(_SYSCALL32_IMPL)
429 else
430 error = swapctl32(fcn, (void *)mdep, &rv);
431 #endif /* _SYSCALL32_IMPL */
432 return (error ? set_errno(error) : rv);
433 }
434
435 /*
436 * Certain subcommands intepret a non-NULL mdep value as a pointer to
437 * a boot string. We pull that in as bootargs, if applicable.
438 */
439 if (mdep != NULL &&
440 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
441 cmd == A_FREEZE || cmd == A_CONFIG)) {
442 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
443 if ((error = copyinstr((const char *)mdep, bootargs,
444 BOOTARGS_MAX, &nbytes)) != 0) {
445 kmem_free(bootargs, BOOTARGS_MAX);
446 return (set_errno(error));
447 }
448 }
449
450 /*
451 * Invoke the appropriate kadmin() routine.
452 */
453 if (getzoneid() != GLOBAL_ZONEID)
454 error = zone_kadmin(cmd, fcn, bootargs, credp);
455 else
456 error = kadmin(cmd, fcn, bootargs, credp);
457
458 if (bootargs != NULL)
459 kmem_free(bootargs, BOOTARGS_MAX);
460 return (error ? set_errno(error) : 0);
461 }