1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/proc.h>
  27 #include <sys/systm.h>
  28 #include <sys/param.h>
  29 #include <sys/kmem.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/types.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/user.h>
  34 #include <sys/cred.h>
  35 #include <sys/vnode.h>
  36 #include <sys/file.h>
  37 #include <sys/pathname.h>
  38 #include <sys/modctl.h>
  39 #include <sys/acctctl.h>
  40 #include <sys/bitmap.h>
  41 #include <sys/exacct.h>
  42 #include <sys/policy.h>
  43 
  44 /*
  45  * acctctl(2)
  46  *
  47  *   acctctl() provides the administrative interface to the extended accounting
  48  *   subsystem.  The process and task accounting facilities are configurable:
  49  *   resources can be individually specified for recording in the appropriate
  50  *   accounting file.
  51  *
  52  *   The current implementation of acctctl() requires that the process and task
  53  *   and flow files be distinct across all zones.
  54  *
  55  * Locking
  56  *   Each accounting species has an ac_info_t which contains a mutex,
  57  *   used to protect the ac_info_t's contents, and to serialize access to the
  58  *   appropriate file.
  59  */
  60 
  61 static list_t exacct_globals_list;
  62 static kmutex_t exacct_globals_list_lock;
  63 
  64 static int
  65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
  66 {
  67         int state;
  68 
  69         if (buf == NULL || (bufsz != sizeof (int)))
  70                 return (EINVAL);
  71 
  72         if (copyin(buf, &state, bufsz) != 0)
  73                 return (EFAULT);
  74 
  75         if (state != AC_ON && state != AC_OFF)
  76                 return (EINVAL);
  77 
  78         mutex_enter(&info->ac_lock);
  79         info->ac_state = state;
  80         mutex_exit(&info->ac_lock);
  81         return (0);
  82 }
  83 
  84 static int
  85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
  86 {
  87         if (buf == NULL || (bufsz != sizeof (int)))
  88                 return (EINVAL);
  89 
  90         mutex_enter(&info->ac_lock);
  91         if (copyout(&info->ac_state, buf, bufsz) != 0) {
  92                 mutex_exit(&info->ac_lock);
  93                 return (EFAULT);
  94         }
  95         mutex_exit(&info->ac_lock);
  96         return (0);
  97 }
  98 
  99 static boolean_t
 100 ac_file_in_use(vnode_t *vp)
 101 {
 102         boolean_t in_use = B_FALSE;
 103         struct exacct_globals *acg;
 104 
 105         if (vp == NULL)
 106                 return (B_FALSE);
 107         mutex_enter(&exacct_globals_list_lock);
 108         /*
 109          * Start off by grabbing all locks.
 110          */
 111         for (acg = list_head(&exacct_globals_list); acg != NULL;
 112             acg = list_next(&exacct_globals_list, acg)) {
 113                 mutex_enter(&acg->ac_proc.ac_lock);
 114                 mutex_enter(&acg->ac_task.ac_lock);
 115                 mutex_enter(&acg->ac_flow.ac_lock);
 116                 mutex_enter(&acg->ac_net.ac_lock);
 117         }
 118 
 119         for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
 120             acg = list_next(&exacct_globals_list, acg)) {
 121                 /*
 122                  * We need to verify that we aren't already using this file for
 123                  * accounting in any zone.
 124                  */
 125                 if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
 126                     vn_compare(acg->ac_task.ac_vnode, vp) ||
 127                     vn_compare(acg->ac_flow.ac_vnode, vp) ||
 128                     vn_compare(acg->ac_net.ac_vnode, vp))
 129                         in_use = B_TRUE;
 130         }
 131 
 132         /*
 133          * Drop all locks.
 134          */
 135         for (acg = list_head(&exacct_globals_list); acg != NULL;
 136             acg = list_next(&exacct_globals_list, acg)) {
 137                 mutex_exit(&acg->ac_proc.ac_lock);
 138                 mutex_exit(&acg->ac_task.ac_lock);
 139                 mutex_exit(&acg->ac_flow.ac_lock);
 140                 mutex_exit(&acg->ac_net.ac_lock);
 141         }
 142         mutex_exit(&exacct_globals_list_lock);
 143         return (in_use);
 144 }
 145 
 146 static int
 147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
 148 {
 149         int error = 0;
 150         void *kbuf;
 151         void *namebuf;
 152         int namelen;
 153         vnode_t *vp;
 154         void *hdr;
 155         size_t hdrsize;
 156         vattr_t va;
 157 
 158         if (ubuf == NULL) {
 159                 mutex_enter(&info->ac_lock);
 160 
 161                 /*
 162                  * Closing accounting file
 163                  */
 164                 if (info->ac_vnode != NULL) {
 165                         error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
 166                             CRED(), NULL);
 167                         if (error) {
 168                                 mutex_exit(&info->ac_lock);
 169                                 return (error);
 170                         }
 171                         VN_RELE(info->ac_vnode);
 172                         info->ac_vnode = NULL;
 173                 }
 174                 if (info->ac_file != NULL) {
 175                         kmem_free(info->ac_file, strlen(info->ac_file) + 1);
 176                         info->ac_file = NULL;
 177                 }
 178 
 179                 mutex_exit(&info->ac_lock);
 180                 return (error);
 181         }
 182 
 183         if (bufsz < 2 || bufsz > MAXPATHLEN)
 184                 return (EINVAL);
 185 
 186         /*
 187          * We have to copy in the whole buffer since we can't tell the length
 188          * of the string in user's address space.
 189          */
 190         kbuf = kmem_zalloc(bufsz, KM_SLEEP);
 191         if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
 192                 kmem_free(kbuf, bufsz);
 193                 return (error);
 194         }
 195         if (*((char *)kbuf) != '/') {
 196                 kmem_free(kbuf, bufsz);
 197                 return (EINVAL);
 198         }
 199 
 200         /*
 201          * Now, allocate the space where we are going to save the
 202          * name of the accounting file and kmem_free kbuf. We have to do this
 203          * now because it is not good to sleep in kmem_alloc() while
 204          * holding ac_info's lock.
 205          */
 206         namelen = strlen(kbuf) + 1;
 207         namebuf = kmem_alloc(namelen, KM_SLEEP);
 208         (void) strcpy(namebuf, kbuf);
 209         kmem_free(kbuf, bufsz);
 210 
 211         /*
 212          * Check if this file already exists.
 213          */
 214         error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
 215 
 216         /*
 217          * Check if the file is already in use.
 218          */
 219         if (!error) {
 220                 if (ac_file_in_use(vp)) {
 221                         /*
 222                          * If we're already using it then return EBUSY
 223                          */
 224                         kmem_free(namebuf, namelen);
 225                         VN_RELE(vp);
 226                         return (EBUSY);
 227                 }
 228                 VN_RELE(vp);
 229         }
 230 
 231         /*
 232          * Create an exacct header here because exacct_create_header() may
 233          * sleep so we should not be holding ac_lock. At this point we cannot
 234          * reliably know if we need the header or not, so we may end up not
 235          * using the header.
 236          */
 237         hdr = exacct_create_header(&hdrsize);
 238 
 239         /*
 240          * Now, grab info's ac_lock and try to set up everything.
 241          */
 242         mutex_enter(&info->ac_lock);
 243 
 244         if ((error = vn_open(namebuf, UIO_SYSSPACE,
 245             FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) {
 246                 mutex_exit(&info->ac_lock);
 247                 kmem_free(namebuf, namelen);
 248                 kmem_free(hdr, hdrsize);
 249                 return (error);
 250         }
 251 
 252         if (vp->v_type != VREG) {
 253                 VN_RELE(vp);
 254                 mutex_exit(&info->ac_lock);
 255                 kmem_free(namebuf, namelen);
 256                 kmem_free(hdr, hdrsize);
 257                 return (EACCES);
 258         }
 259 
 260         if (info->ac_vnode != NULL) {
 261                 /*
 262                  * Switch from an old file to a new file by swapping
 263                  * their vnode pointers.
 264                  */
 265                 vnode_t *oldvp;
 266                 oldvp = info->ac_vnode;
 267                 info->ac_vnode = vp;
 268                 vp = oldvp;
 269         } else {
 270                 /*
 271                  * Start writing accounting records to a new file.
 272                  */
 273                 info->ac_vnode = vp;
 274                 vp = NULL;
 275         }
 276         if (vp) {
 277                 /*
 278                  * We still need to close the old file.
 279                  */
 280                 if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
 281                         VN_RELE(vp);
 282                         mutex_exit(&info->ac_lock);
 283                         kmem_free(namebuf, namelen);
 284                         kmem_free(hdr, hdrsize);
 285                         return (error);
 286                 }
 287                 VN_RELE(vp);
 288                 if (info->ac_file != NULL) {
 289                         kmem_free(info->ac_file,
 290                             strlen(info->ac_file) + 1);
 291                         info->ac_file = NULL;
 292                 }
 293         }
 294         info->ac_file = namebuf;
 295 
 296         /*
 297          * Write the exacct header only if the file is empty.
 298          */
 299         error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL);
 300         if (error == 0 && va.va_size == 0)
 301                 error = exacct_write_header(info, hdr, hdrsize);
 302 
 303         mutex_exit(&info->ac_lock);
 304         kmem_free(hdr, hdrsize);
 305         return (error);
 306 }
 307 
 308 static int
 309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
 310 {
 311         int error = 0;
 312         vnode_t *vnode;
 313         char *file;
 314 
 315         mutex_enter(&info->ac_lock);
 316         file = info->ac_file;
 317         vnode = info->ac_vnode;
 318 
 319         if (file == NULL || vnode == NULL) {
 320                 mutex_exit(&info->ac_lock);
 321                 return (ENOTACTIVE);
 322         }
 323 
 324         if (strlen(file) >= bufsz)
 325                 error = ENOMEM;
 326         else
 327                 error = copyoutstr(file, buf, MAXPATHLEN, NULL);
 328 
 329         mutex_exit(&info->ac_lock);
 330         return (error);
 331 }
 332 
 333 static int
 334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
 335 {
 336         ac_res_t *res;
 337         ac_res_t *tmp;
 338         ulong_t *maskp;
 339         int id;
 340         uint_t counter = 0;
 341 
 342         /*
 343          * Validate that a non-zero buffer, sized within limits and to an
 344          * integral number of ac_res_t's has been specified.
 345          */
 346         if (bufsz == 0 ||
 347             bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
 348             (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
 349                 return (EINVAL);
 350 
 351         tmp = res = kmem_alloc(bufsz, KM_SLEEP);
 352         if (copyin(buf, res, bufsz) != 0) {
 353                 kmem_free(res, bufsz);
 354                 return (EFAULT);
 355         }
 356 
 357         maskp = (ulong_t *)&info->ac_mask;
 358 
 359         mutex_enter(&info->ac_lock);
 360         while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
 361                 if (id > maxres || id < 0) {
 362                         mutex_exit(&info->ac_lock);
 363                         kmem_free(res, bufsz);
 364                         return (EINVAL);
 365                 }
 366                 if (tmp->ar_state == AC_ON) {
 367                         BT_SET(maskp, id);
 368                 } else if (tmp->ar_state == AC_OFF) {
 369                         BT_CLEAR(maskp, id);
 370                 } else {
 371                         mutex_exit(&info->ac_lock);
 372                         kmem_free(res, bufsz);
 373                         return (EINVAL);
 374                 }
 375                 tmp++;
 376                 counter++;
 377         }
 378         mutex_exit(&info->ac_lock);
 379         kmem_free(res, bufsz);
 380         return (0);
 381 }
 382 
 383 static int
 384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
 385 {
 386         int error = 0;
 387         ac_res_t *res;
 388         ac_res_t *tmp;
 389         size_t ressz = sizeof (ac_res_t) * (maxres + 1);
 390         ulong_t *maskp;
 391         int id;
 392 
 393         if (bufsz < ressz)
 394                 return (EINVAL);
 395         tmp = res = kmem_alloc(ressz, KM_SLEEP);
 396 
 397         mutex_enter(&info->ac_lock);
 398         maskp = (ulong_t *)&info->ac_mask;
 399         for (id = 1; id <= maxres; id++) {
 400                 tmp->ar_id = id;
 401                 tmp->ar_state = BT_TEST(maskp, id);
 402                 tmp++;
 403         }
 404         tmp->ar_id = AC_NONE;
 405         tmp->ar_state = AC_OFF;
 406         mutex_exit(&info->ac_lock);
 407         error = copyout(res, buf, ressz);
 408         kmem_free(res, ressz);
 409         return (error);
 410 }
 411 
 412 /*
 413  * acctctl()
 414  *
 415  * Overview
 416  *   acctctl() is the entry point for the acctctl(2) system call.
 417  *
 418  * Return values
 419  *   On successful completion, return 0; otherwise -1 is returned and errno is
 420  *   set appropriately.
 421  *
 422  * Caller's context
 423  *   Called from the system call path.
 424  */
 425 int
 426 acctctl(int cmd, void *buf, size_t bufsz)
 427 {
 428         int error = 0;
 429         int mode = AC_MODE(cmd);
 430         int option = AC_OPTION(cmd);
 431         int maxres;
 432         ac_info_t *info;
 433         zone_t *zone = curproc->p_zone;
 434         struct exacct_globals *acg;
 435 
 436         acg = zone_getspecific(exacct_zone_key, zone);
 437         /*
 438          * exacct_zone_key and associated per-zone state were initialized when
 439          * the module was loaded.
 440          */
 441         ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
 442         ASSERT(acg != NULL);
 443 
 444         switch (mode) { /* sanity check */
 445         case AC_TASK:
 446                 info = &acg->ac_task;
 447                 maxres = AC_TASK_MAX_RES;
 448                 break;
 449         case AC_PROC:
 450                 info = &acg->ac_proc;
 451                 maxres = AC_PROC_MAX_RES;
 452                 break;
 453         /*
 454          * Flow/net accounting isn't configurable in non-global
 455          * zones, but we have this field on a per-zone basis for future
 456          * expansion as well as the ability to return default "unset"
 457          * values for the various AC_*_GET queries.  AC_*_SET commands
 458          * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
 459          */
 460         case AC_FLOW:
 461                 info = &acg->ac_flow;
 462                 maxres = AC_FLOW_MAX_RES;
 463                 break;
 464         case AC_NET:
 465                 info = &acg->ac_net;
 466                 maxres = AC_NET_MAX_RES;
 467                 break;
 468         default:
 469                 return (set_errno(EINVAL));
 470         }
 471 
 472         switch (option) {
 473         case AC_STATE_SET:
 474                 if ((error = secpolicy_acct(CRED())) != 0)
 475                         break;
 476                 if ((mode == AC_FLOW || mode == AC_NET) &&
 477                     getzoneid() != GLOBAL_ZONEID) {
 478                         error = EPERM;
 479                         break;
 480                 }
 481                 error = ac_state_set(info, buf, bufsz);
 482                 break;
 483         case AC_STATE_GET:
 484                 error = ac_state_get(info, buf, bufsz);
 485                 break;
 486         case AC_FILE_SET:
 487                 if ((error = secpolicy_acct(CRED())) != 0)
 488                         break;
 489                 if ((mode == AC_FLOW || mode == AC_NET) &&
 490                     getzoneid() != GLOBAL_ZONEID) {
 491                         error = EPERM;
 492                         break;
 493                 }
 494                 error = ac_file_set(info, buf, bufsz);
 495                 break;
 496         case AC_FILE_GET:
 497                 error = ac_file_get(info, buf, bufsz);
 498                 break;
 499         case AC_RES_SET:
 500                 if ((error = secpolicy_acct(CRED())) != 0)
 501                         break;
 502                 if ((mode == AC_FLOW || mode == AC_NET) &&
 503                     getzoneid() != GLOBAL_ZONEID) {
 504                         error = EPERM;
 505                         break;
 506                 }
 507                 error = ac_res_set(info, buf, bufsz, maxres);
 508                 break;
 509         case AC_RES_GET:
 510                 error = ac_res_get(info, buf, bufsz, maxres);
 511                 break;
 512         default:
 513                 return (set_errno(EINVAL));
 514         }
 515         if (error)
 516                 return (set_errno(error));
 517         return (0);
 518 }
 519 
 520 static struct sysent ac_sysent = {
 521         3,
 522         SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
 523         acctctl
 524 };
 525 
 526 static struct modlsys modlsys = {
 527         &mod_syscallops,
 528         "acctctl system call",
 529         &ac_sysent
 530 };
 531 
 532 #ifdef _SYSCALL32_IMPL
 533 static struct modlsys modlsys32 = {
 534         &mod_syscallops32,
 535         "32-bit acctctl system call",
 536         &ac_sysent
 537 };
 538 #endif
 539 
 540 static struct modlinkage modlinkage = {
 541         MODREV_1,
 542         {   &modlsys,
 543 #ifdef _SYSCALL32_IMPL
 544             &modlsys32,
 545 #endif
 546             NULL
 547         }
 548 };
 549 
 550 /* ARGSUSED */
 551 static void *
 552 exacct_zone_init(zoneid_t zoneid)
 553 {
 554         struct exacct_globals *acg;
 555 
 556         acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
 557         mutex_enter(&exacct_globals_list_lock);
 558         list_insert_tail(&exacct_globals_list, acg);
 559         mutex_exit(&exacct_globals_list_lock);
 560         return (acg);
 561 }
 562 
 563 static void
 564 exacct_free_info(ac_info_t *info)
 565 {
 566         mutex_enter(&info->ac_lock);
 567         if (info->ac_vnode) {
 568                 (void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
 569                 VN_RELE(info->ac_vnode);
 570                 kmem_free(info->ac_file, strlen(info->ac_file) + 1);
 571         }
 572         info->ac_state = AC_OFF;
 573         info->ac_vnode = NULL;
 574         info->ac_file = NULL;
 575         mutex_exit(&info->ac_lock);
 576 }
 577 
 578 /* ARGSUSED */
 579 static void
 580 exacct_zone_shutdown(zoneid_t zoneid, void *data)
 581 {
 582         struct exacct_globals *acg = data;
 583 
 584         /*
 585          * The accounting files need to be closed during shutdown rather than
 586          * destroy, since otherwise the filesystem they reside on may fail to
 587          * unmount, thus causing the entire zone halt/reboot to fail.
 588          */
 589         exacct_free_info(&acg->ac_proc);
 590         exacct_free_info(&acg->ac_task);
 591         exacct_free_info(&acg->ac_flow);
 592         exacct_free_info(&acg->ac_net);
 593 }
 594 
 595 /* ARGSUSED */
 596 static void
 597 exacct_zone_fini(zoneid_t zoneid, void *data)
 598 {
 599         struct exacct_globals *acg = data;
 600 
 601         mutex_enter(&exacct_globals_list_lock);
 602         list_remove(&exacct_globals_list, acg);
 603         mutex_exit(&exacct_globals_list_lock);
 604 
 605         mutex_destroy(&acg->ac_proc.ac_lock);
 606         mutex_destroy(&acg->ac_task.ac_lock);
 607         mutex_destroy(&acg->ac_flow.ac_lock);
 608         mutex_destroy(&acg->ac_net.ac_lock);
 609         kmem_free(acg, sizeof (*acg));
 610 }
 611 
 612 int
 613 _init()
 614 {
 615         int error;
 616 
 617         mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
 618         list_create(&exacct_globals_list, sizeof (struct exacct_globals),
 619             offsetof(struct exacct_globals, ac_link));
 620         zone_key_create(&exacct_zone_key, exacct_zone_init,
 621             exacct_zone_shutdown, exacct_zone_fini);
 622 
 623         if ((error = mod_install(&modlinkage)) != 0) {
 624                 (void) zone_key_delete(exacct_zone_key);
 625                 exacct_zone_key = ZONE_KEY_UNINITIALIZED;
 626                 mutex_destroy(&exacct_globals_list_lock);
 627                 list_destroy(&exacct_globals_list);
 628         }
 629         return (error);
 630 }
 631 
 632 int
 633 _info(struct modinfo *modinfop)
 634 {
 635         return (mod_info(&modlinkage, modinfop));
 636 }
 637 
 638 int
 639 _fini()
 640 {
 641         return (EBUSY);
 642 }