1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/proc.h> 27 #include <sys/systm.h> 28 #include <sys/param.h> 29 #include <sys/kmem.h> 30 #include <sys/sysmacros.h> 31 #include <sys/types.h> 32 #include <sys/cmn_err.h> 33 #include <sys/user.h> 34 #include <sys/cred.h> 35 #include <sys/vnode.h> 36 #include <sys/file.h> 37 #include <sys/pathname.h> 38 #include <sys/modctl.h> 39 #include <sys/acctctl.h> 40 #include <sys/bitmap.h> 41 #include <sys/exacct.h> 42 #include <sys/policy.h> 43 44 /* 45 * acctctl(2) 46 * 47 * acctctl() provides the administrative interface to the extended accounting 48 * subsystem. The process and task accounting facilities are configurable: 49 * resources can be individually specified for recording in the appropriate 50 * accounting file. 51 * 52 * The current implementation of acctctl() requires that the process and task 53 * and flow files be distinct across all zones. 54 * 55 * Locking 56 * Each accounting species has an ac_info_t which contains a mutex, 57 * used to protect the ac_info_t's contents, and to serialize access to the 58 * appropriate file. 59 */ 60 61 static list_t exacct_globals_list; 62 static kmutex_t exacct_globals_list_lock; 63 64 static int 65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz) 66 { 67 int state; 68 69 if (buf == NULL || (bufsz != sizeof (int))) 70 return (EINVAL); 71 72 if (copyin(buf, &state, bufsz) != 0) 73 return (EFAULT); 74 75 if (state != AC_ON && state != AC_OFF) 76 return (EINVAL); 77 78 mutex_enter(&info->ac_lock); 79 info->ac_state = state; 80 mutex_exit(&info->ac_lock); 81 return (0); 82 } 83 84 static int 85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz) 86 { 87 if (buf == NULL || (bufsz != sizeof (int))) 88 return (EINVAL); 89 90 mutex_enter(&info->ac_lock); 91 if (copyout(&info->ac_state, buf, bufsz) != 0) { 92 mutex_exit(&info->ac_lock); 93 return (EFAULT); 94 } 95 mutex_exit(&info->ac_lock); 96 return (0); 97 } 98 99 static boolean_t 100 ac_file_in_use(vnode_t *vp) 101 { 102 boolean_t in_use = B_FALSE; 103 struct exacct_globals *acg; 104 105 if (vp == NULL) 106 return (B_FALSE); 107 mutex_enter(&exacct_globals_list_lock); 108 /* 109 * Start off by grabbing all locks. 110 */ 111 for (acg = list_head(&exacct_globals_list); acg != NULL; 112 acg = list_next(&exacct_globals_list, acg)) { 113 mutex_enter(&acg->ac_proc.ac_lock); 114 mutex_enter(&acg->ac_task.ac_lock); 115 mutex_enter(&acg->ac_flow.ac_lock); 116 mutex_enter(&acg->ac_net.ac_lock); 117 } 118 119 for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL; 120 acg = list_next(&exacct_globals_list, acg)) { 121 /* 122 * We need to verify that we aren't already using this file for 123 * accounting in any zone. 124 */ 125 if (vn_compare(acg->ac_proc.ac_vnode, vp) || 126 vn_compare(acg->ac_task.ac_vnode, vp) || 127 vn_compare(acg->ac_flow.ac_vnode, vp) || 128 vn_compare(acg->ac_net.ac_vnode, vp)) 129 in_use = B_TRUE; 130 } 131 132 /* 133 * Drop all locks. 134 */ 135 for (acg = list_head(&exacct_globals_list); acg != NULL; 136 acg = list_next(&exacct_globals_list, acg)) { 137 mutex_exit(&acg->ac_proc.ac_lock); 138 mutex_exit(&acg->ac_task.ac_lock); 139 mutex_exit(&acg->ac_flow.ac_lock); 140 mutex_exit(&acg->ac_net.ac_lock); 141 } 142 mutex_exit(&exacct_globals_list_lock); 143 return (in_use); 144 } 145 146 static int 147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz) 148 { 149 int error = 0; 150 void *kbuf; 151 void *namebuf; 152 int namelen; 153 vnode_t *vp; 154 void *hdr; 155 size_t hdrsize; 156 vattr_t va; 157 158 if (ubuf == NULL) { 159 mutex_enter(&info->ac_lock); 160 161 /* 162 * Closing accounting file 163 */ 164 if (info->ac_vnode != NULL) { 165 error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, 166 CRED(), NULL); 167 if (error) { 168 mutex_exit(&info->ac_lock); 169 return (error); 170 } 171 VN_RELE(info->ac_vnode); 172 info->ac_vnode = NULL; 173 } 174 if (info->ac_file != NULL) { 175 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 176 info->ac_file = NULL; 177 } 178 179 mutex_exit(&info->ac_lock); 180 return (error); 181 } 182 183 if (bufsz < 2 || bufsz > MAXPATHLEN) 184 return (EINVAL); 185 186 /* 187 * We have to copy in the whole buffer since we can't tell the length 188 * of the string in user's address space. 189 */ 190 kbuf = kmem_zalloc(bufsz, KM_SLEEP); 191 if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) { 192 kmem_free(kbuf, bufsz); 193 return (error); 194 } 195 if (*((char *)kbuf) != '/') { 196 kmem_free(kbuf, bufsz); 197 return (EINVAL); 198 } 199 200 /* 201 * Now, allocate the space where we are going to save the 202 * name of the accounting file and kmem_free kbuf. We have to do this 203 * now because it is not good to sleep in kmem_alloc() while 204 * holding ac_info's lock. 205 */ 206 namelen = strlen(kbuf) + 1; 207 namebuf = kmem_alloc(namelen, KM_SLEEP); 208 (void) strcpy(namebuf, kbuf); 209 kmem_free(kbuf, bufsz); 210 211 /* 212 * Check if this file already exists. 213 */ 214 error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 215 216 /* 217 * Check if the file is already in use. 218 */ 219 if (!error) { 220 if (ac_file_in_use(vp)) { 221 /* 222 * If we're already using it then return EBUSY 223 */ 224 kmem_free(namebuf, namelen); 225 VN_RELE(vp); 226 return (EBUSY); 227 } 228 VN_RELE(vp); 229 } 230 231 /* 232 * Create an exacct header here because exacct_create_header() may 233 * sleep so we should not be holding ac_lock. At this point we cannot 234 * reliably know if we need the header or not, so we may end up not 235 * using the header. 236 */ 237 hdr = exacct_create_header(&hdrsize); 238 239 /* 240 * Now, grab info's ac_lock and try to set up everything. 241 */ 242 mutex_enter(&info->ac_lock); 243 244 if ((error = vn_open(namebuf, UIO_SYSSPACE, 245 FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) { 246 mutex_exit(&info->ac_lock); 247 kmem_free(namebuf, namelen); 248 kmem_free(hdr, hdrsize); 249 return (error); 250 } 251 252 if (vp->v_type != VREG) { 253 VN_RELE(vp); 254 mutex_exit(&info->ac_lock); 255 kmem_free(namebuf, namelen); 256 kmem_free(hdr, hdrsize); 257 return (EACCES); 258 } 259 260 if (info->ac_vnode != NULL) { 261 /* 262 * Switch from an old file to a new file by swapping 263 * their vnode pointers. 264 */ 265 vnode_t *oldvp; 266 oldvp = info->ac_vnode; 267 info->ac_vnode = vp; 268 vp = oldvp; 269 } else { 270 /* 271 * Start writing accounting records to a new file. 272 */ 273 info->ac_vnode = vp; 274 vp = NULL; 275 } 276 if (vp) { 277 /* 278 * We still need to close the old file. 279 */ 280 if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) { 281 VN_RELE(vp); 282 mutex_exit(&info->ac_lock); 283 kmem_free(namebuf, namelen); 284 kmem_free(hdr, hdrsize); 285 return (error); 286 } 287 VN_RELE(vp); 288 if (info->ac_file != NULL) { 289 kmem_free(info->ac_file, 290 strlen(info->ac_file) + 1); 291 info->ac_file = NULL; 292 } 293 } 294 info->ac_file = namebuf; 295 296 /* 297 * Write the exacct header only if the file is empty. 298 */ 299 error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL); 300 if (error == 0 && va.va_size == 0) 301 error = exacct_write_header(info, hdr, hdrsize); 302 303 mutex_exit(&info->ac_lock); 304 kmem_free(hdr, hdrsize); 305 return (error); 306 } 307 308 static int 309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz) 310 { 311 int error = 0; 312 vnode_t *vnode; 313 char *file; 314 315 mutex_enter(&info->ac_lock); 316 file = info->ac_file; 317 vnode = info->ac_vnode; 318 319 if (file == NULL || vnode == NULL) { 320 mutex_exit(&info->ac_lock); 321 return (ENOTACTIVE); 322 } 323 324 if (strlen(file) >= bufsz) 325 error = ENOMEM; 326 else 327 error = copyoutstr(file, buf, MAXPATHLEN, NULL); 328 329 mutex_exit(&info->ac_lock); 330 return (error); 331 } 332 333 static int 334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres) 335 { 336 ac_res_t *res; 337 ac_res_t *tmp; 338 ulong_t *maskp; 339 int id; 340 uint_t counter = 0; 341 342 /* 343 * Validate that a non-zero buffer, sized within limits and to an 344 * integral number of ac_res_t's has been specified. 345 */ 346 if (bufsz == 0 || 347 bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) || 348 (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz) 349 return (EINVAL); 350 351 tmp = res = kmem_alloc(bufsz, KM_SLEEP); 352 if (copyin(buf, res, bufsz) != 0) { 353 kmem_free(res, bufsz); 354 return (EFAULT); 355 } 356 357 maskp = (ulong_t *)&info->ac_mask; 358 359 mutex_enter(&info->ac_lock); 360 while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) { 361 if (id > maxres || id < 0) { 362 mutex_exit(&info->ac_lock); 363 kmem_free(res, bufsz); 364 return (EINVAL); 365 } 366 if (tmp->ar_state == AC_ON) { 367 BT_SET(maskp, id); 368 } else if (tmp->ar_state == AC_OFF) { 369 BT_CLEAR(maskp, id); 370 } else { 371 mutex_exit(&info->ac_lock); 372 kmem_free(res, bufsz); 373 return (EINVAL); 374 } 375 tmp++; 376 counter++; 377 } 378 mutex_exit(&info->ac_lock); 379 kmem_free(res, bufsz); 380 return (0); 381 } 382 383 static int 384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres) 385 { 386 int error = 0; 387 ac_res_t *res; 388 ac_res_t *tmp; 389 size_t ressz = sizeof (ac_res_t) * (maxres + 1); 390 ulong_t *maskp; 391 int id; 392 393 if (bufsz < ressz) 394 return (EINVAL); 395 tmp = res = kmem_alloc(ressz, KM_SLEEP); 396 397 mutex_enter(&info->ac_lock); 398 maskp = (ulong_t *)&info->ac_mask; 399 for (id = 1; id <= maxres; id++) { 400 tmp->ar_id = id; 401 tmp->ar_state = BT_TEST(maskp, id); 402 tmp++; 403 } 404 tmp->ar_id = AC_NONE; 405 tmp->ar_state = AC_OFF; 406 mutex_exit(&info->ac_lock); 407 error = copyout(res, buf, ressz); 408 kmem_free(res, ressz); 409 return (error); 410 } 411 412 /* 413 * acctctl() 414 * 415 * Overview 416 * acctctl() is the entry point for the acctctl(2) system call. 417 * 418 * Return values 419 * On successful completion, return 0; otherwise -1 is returned and errno is 420 * set appropriately. 421 * 422 * Caller's context 423 * Called from the system call path. 424 */ 425 int 426 acctctl(int cmd, void *buf, size_t bufsz) 427 { 428 int error = 0; 429 int mode = AC_MODE(cmd); 430 int option = AC_OPTION(cmd); 431 int maxres; 432 ac_info_t *info; 433 zone_t *zone = curproc->p_zone; 434 struct exacct_globals *acg; 435 436 acg = zone_getspecific(exacct_zone_key, zone); 437 /* 438 * exacct_zone_key and associated per-zone state were initialized when 439 * the module was loaded. 440 */ 441 ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED); 442 ASSERT(acg != NULL); 443 444 switch (mode) { /* sanity check */ 445 case AC_TASK: 446 info = &acg->ac_task; 447 maxres = AC_TASK_MAX_RES; 448 break; 449 case AC_PROC: 450 info = &acg->ac_proc; 451 maxres = AC_PROC_MAX_RES; 452 break; 453 /* 454 * Flow/net accounting isn't configurable in non-global 455 * zones, but we have this field on a per-zone basis for future 456 * expansion as well as the ability to return default "unset" 457 * values for the various AC_*_GET queries. AC_*_SET commands 458 * fail with EPERM for AC_FLOW and AC_NET in non-global zones. 459 */ 460 case AC_FLOW: 461 info = &acg->ac_flow; 462 maxres = AC_FLOW_MAX_RES; 463 break; 464 case AC_NET: 465 info = &acg->ac_net; 466 maxres = AC_NET_MAX_RES; 467 break; 468 default: 469 return (set_errno(EINVAL)); 470 } 471 472 switch (option) { 473 case AC_STATE_SET: 474 if ((error = secpolicy_acct(CRED())) != 0) 475 break; 476 if ((mode == AC_FLOW || mode == AC_NET) && 477 getzoneid() != GLOBAL_ZONEID) { 478 error = EPERM; 479 break; 480 } 481 error = ac_state_set(info, buf, bufsz); 482 break; 483 case AC_STATE_GET: 484 error = ac_state_get(info, buf, bufsz); 485 break; 486 case AC_FILE_SET: 487 if ((error = secpolicy_acct(CRED())) != 0) 488 break; 489 if ((mode == AC_FLOW || mode == AC_NET) && 490 getzoneid() != GLOBAL_ZONEID) { 491 error = EPERM; 492 break; 493 } 494 error = ac_file_set(info, buf, bufsz); 495 break; 496 case AC_FILE_GET: 497 error = ac_file_get(info, buf, bufsz); 498 break; 499 case AC_RES_SET: 500 if ((error = secpolicy_acct(CRED())) != 0) 501 break; 502 if ((mode == AC_FLOW || mode == AC_NET) && 503 getzoneid() != GLOBAL_ZONEID) { 504 error = EPERM; 505 break; 506 } 507 error = ac_res_set(info, buf, bufsz, maxres); 508 break; 509 case AC_RES_GET: 510 error = ac_res_get(info, buf, bufsz, maxres); 511 break; 512 default: 513 return (set_errno(EINVAL)); 514 } 515 if (error) 516 return (set_errno(error)); 517 return (0); 518 } 519 520 static struct sysent ac_sysent = { 521 3, 522 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 523 acctctl 524 }; 525 526 static struct modlsys modlsys = { 527 &mod_syscallops, 528 "acctctl system call", 529 &ac_sysent 530 }; 531 532 #ifdef _SYSCALL32_IMPL 533 static struct modlsys modlsys32 = { 534 &mod_syscallops32, 535 "32-bit acctctl system call", 536 &ac_sysent 537 }; 538 #endif 539 540 static struct modlinkage modlinkage = { 541 MODREV_1, 542 { &modlsys, 543 #ifdef _SYSCALL32_IMPL 544 &modlsys32, 545 #endif 546 NULL 547 } 548 }; 549 550 /* ARGSUSED */ 551 static void * 552 exacct_zone_init(zoneid_t zoneid) 553 { 554 struct exacct_globals *acg; 555 556 acg = kmem_zalloc(sizeof (*acg), KM_SLEEP); 557 mutex_enter(&exacct_globals_list_lock); 558 list_insert_tail(&exacct_globals_list, acg); 559 mutex_exit(&exacct_globals_list_lock); 560 return (acg); 561 } 562 563 static void 564 exacct_free_info(ac_info_t *info) 565 { 566 mutex_enter(&info->ac_lock); 567 if (info->ac_vnode) { 568 (void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL); 569 VN_RELE(info->ac_vnode); 570 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 571 } 572 info->ac_state = AC_OFF; 573 info->ac_vnode = NULL; 574 info->ac_file = NULL; 575 mutex_exit(&info->ac_lock); 576 } 577 578 /* ARGSUSED */ 579 static void 580 exacct_zone_shutdown(zoneid_t zoneid, void *data) 581 { 582 struct exacct_globals *acg = data; 583 584 /* 585 * The accounting files need to be closed during shutdown rather than 586 * destroy, since otherwise the filesystem they reside on may fail to 587 * unmount, thus causing the entire zone halt/reboot to fail. 588 */ 589 exacct_free_info(&acg->ac_proc); 590 exacct_free_info(&acg->ac_task); 591 exacct_free_info(&acg->ac_flow); 592 exacct_free_info(&acg->ac_net); 593 } 594 595 /* ARGSUSED */ 596 static void 597 exacct_zone_fini(zoneid_t zoneid, void *data) 598 { 599 struct exacct_globals *acg = data; 600 601 mutex_enter(&exacct_globals_list_lock); 602 list_remove(&exacct_globals_list, acg); 603 mutex_exit(&exacct_globals_list_lock); 604 605 mutex_destroy(&acg->ac_proc.ac_lock); 606 mutex_destroy(&acg->ac_task.ac_lock); 607 mutex_destroy(&acg->ac_flow.ac_lock); 608 mutex_destroy(&acg->ac_net.ac_lock); 609 kmem_free(acg, sizeof (*acg)); 610 } 611 612 int 613 _init() 614 { 615 int error; 616 617 mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL); 618 list_create(&exacct_globals_list, sizeof (struct exacct_globals), 619 offsetof(struct exacct_globals, ac_link)); 620 zone_key_create(&exacct_zone_key, exacct_zone_init, 621 exacct_zone_shutdown, exacct_zone_fini); 622 623 if ((error = mod_install(&modlinkage)) != 0) { 624 (void) zone_key_delete(exacct_zone_key); 625 exacct_zone_key = ZONE_KEY_UNINITIALIZED; 626 mutex_destroy(&exacct_globals_list_lock); 627 list_destroy(&exacct_globals_list); 628 } 629 return (error); 630 } 631 632 int 633 _info(struct modinfo *modinfop) 634 { 635 return (mod_info(&modlinkage, modinfop)); 636 } 637 638 int 639 _fini() 640 { 641 return (EBUSY); 642 }