1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/systm.h>
  27 #include <sys/cmn_err.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/thread.h>
  30 #include <sys/disp.h>
  31 #include <sys/kmem.h>
  32 #include <sys/debug.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/cpupart.h>
  35 #include <sys/pset.h>
  36 #include <sys/modctl.h>
  37 #include <sys/syscall.h>
  38 #include <sys/task.h>
  39 #include <sys/loadavg.h>
  40 #include <sys/fss.h>
  41 #include <sys/pool.h>
  42 #include <sys/pool_pset.h>
  43 #include <sys/policy.h>
  44 #include <sys/zone.h>
  45 #include <sys/contract/process_impl.h>
  46 
  47 static int      pset(int, long, long, long, long);
  48 
  49 static struct sysent pset_sysent = {
  50         5,
  51         SE_ARGC | SE_NOUNLOAD,
  52         (int (*)())pset,
  53 };
  54 
  55 static struct modlsys modlsys = {
  56         &mod_syscallops, "processor sets", &pset_sysent
  57 };
  58 
  59 #ifdef _SYSCALL32_IMPL
  60 static struct modlsys modlsys32 = {
  61         &mod_syscallops32, "32-bit pset(2) syscall", &pset_sysent
  62 };
  63 #endif
  64 
  65 static struct modlinkage modlinkage = {
  66         MODREV_1,
  67         {   &modlsys,
  68 #ifdef _SYSCALL32_IMPL
  69             &modlsys32,
  70 #endif
  71             NULL
  72         }
  73 };
  74 
  75 #define PSET_BADATTR(attr)      ((~PSET_NOESCAPE) & (attr))
  76 
  77 int
  78 _init(void)
  79 {
  80         return (mod_install(&modlinkage));
  81 }
  82 
  83 int
  84 _info(struct modinfo *modinfop)
  85 {
  86         return (mod_info(&modlinkage, modinfop));
  87 }
  88 
  89 static int
  90 pset_create(psetid_t *psetp)
  91 {
  92         psetid_t newpset;
  93         int error;
  94 
  95         if (secpolicy_pset(CRED()) != 0)
  96                 return (set_errno(EPERM));
  97 
  98         pool_lock();
  99         if (pool_state == POOL_ENABLED) {
 100                 pool_unlock();
 101                 return (set_errno(ENOTSUP));
 102         }
 103         error = cpupart_create(&newpset);
 104         if (error) {
 105                 pool_unlock();
 106                 return (set_errno(error));
 107         }
 108         if (copyout(&newpset, psetp, sizeof (psetid_t)) != 0) {
 109                 (void) cpupart_destroy(newpset);
 110                 pool_unlock();
 111                 return (set_errno(EFAULT));
 112         }
 113         pool_unlock();
 114         return (error);
 115 }
 116 
 117 static int
 118 pset_destroy(psetid_t pset)
 119 {
 120         int error;
 121 
 122         if (secpolicy_pset(CRED()) != 0)
 123                 return (set_errno(EPERM));
 124 
 125         pool_lock();
 126         if (pool_state == POOL_ENABLED) {
 127                 pool_unlock();
 128                 return (set_errno(ENOTSUP));
 129         }
 130         error = cpupart_destroy(pset);
 131         pool_unlock();
 132         if (error)
 133                 return (set_errno(error));
 134         else
 135                 return (0);
 136 }
 137 
 138 static int
 139 pset_assign(psetid_t pset, processorid_t cpuid, psetid_t *opset, int forced)
 140 {
 141         psetid_t oldpset;
 142         int     error = 0;
 143         cpu_t   *cp;
 144 
 145         if (pset != PS_QUERY && secpolicy_pset(CRED()) != 0)
 146                 return (set_errno(EPERM));
 147 
 148         pool_lock();
 149         if (pset != PS_QUERY && pool_state == POOL_ENABLED) {
 150                 pool_unlock();
 151                 return (set_errno(ENOTSUP));
 152         }
 153 
 154         mutex_enter(&cpu_lock);
 155         if ((cp = cpu_get(cpuid)) == NULL) {
 156                 mutex_exit(&cpu_lock);
 157                 pool_unlock();
 158                 return (set_errno(EINVAL));
 159         }
 160 
 161         oldpset = cpupart_query_cpu(cp);
 162 
 163         if (pset != PS_QUERY)
 164                 error = cpupart_attach_cpu(pset, cp, forced);
 165         mutex_exit(&cpu_lock);
 166         pool_unlock();
 167 
 168         if (error)
 169                 return (set_errno(error));
 170 
 171         if (opset != NULL)
 172                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 173                         return (set_errno(EFAULT));
 174 
 175         return (0);
 176 }
 177 
 178 static int
 179 pset_info(psetid_t pset, int *typep, uint_t *numcpusp,
 180     processorid_t *cpulistp)
 181 {
 182         int pset_type;
 183         uint_t user_ncpus = 0, real_ncpus, copy_ncpus;
 184         processorid_t *pset_cpus = NULL;
 185         int error = 0;
 186 
 187         if (numcpusp != NULL) {
 188                 if (copyin(numcpusp, &user_ncpus, sizeof (uint_t)) != 0)
 189                         return (set_errno(EFAULT));
 190         }
 191 
 192         if (user_ncpus > max_ncpus)  /* sanity check */
 193                 user_ncpus = max_ncpus;
 194         if (user_ncpus != 0 && cpulistp != NULL)
 195                 pset_cpus = kmem_alloc(sizeof (processorid_t) * user_ncpus,
 196                     KM_SLEEP);
 197 
 198         real_ncpus = user_ncpus;
 199         if ((error = cpupart_get_cpus(&pset, pset_cpus, &real_ncpus)) != 0)
 200                 goto out;
 201 
 202         /*
 203          * Now copyout the information about this processor set.
 204          */
 205 
 206         /*
 207          * Get number of cpus to copy back.  If the user didn't pass in
 208          * a big enough buffer, only copy back as many cpus as fits in
 209          * the buffer but copy back the real number of cpus.
 210          */
 211 
 212         if (user_ncpus != 0 && cpulistp != NULL) {
 213                 copy_ncpus = MIN(real_ncpus, user_ncpus);
 214                 if (copyout(pset_cpus, cpulistp,
 215                     sizeof (processorid_t) * copy_ncpus) != 0) {
 216                         error = EFAULT;
 217                         goto out;
 218                 }
 219         }
 220         if (pset_cpus != NULL)
 221                 kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
 222         if (typep != NULL) {
 223                 if (pset == PS_NONE)
 224                         pset_type = PS_NONE;
 225                 else
 226                         pset_type = PS_PRIVATE;
 227                 if (copyout(&pset_type, typep, sizeof (int)) != 0)
 228                         return (set_errno(EFAULT));
 229         }
 230         if (numcpusp != NULL)
 231                 if (copyout(&real_ncpus, numcpusp, sizeof (uint_t)) != 0)
 232                         return (set_errno(EFAULT));
 233         return (0);
 234 
 235 out:
 236         if (pset_cpus != NULL)
 237                 kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
 238         return (set_errno(error));
 239 }
 240 
 241 static int
 242 pset_bind_thread(kthread_t *tp, psetid_t pset, psetid_t *oldpset, void *projbuf,
 243     void *zonebuf)
 244 {
 245         int error = 0;
 246 
 247         ASSERT(pool_lock_held());
 248         ASSERT(MUTEX_HELD(&cpu_lock));
 249         ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
 250 
 251         *oldpset = tp->t_bind_pset;
 252 
 253         switch (pset) {
 254         case PS_SOFT:
 255                 TB_PSET_SOFT_SET(tp);
 256                 break;
 257 
 258         case PS_HARD:
 259                 TB_PSET_HARD_SET(tp);
 260                 break;
 261 
 262         case PS_QUERY:
 263                 break;
 264 
 265         case PS_QUERY_TYPE:
 266                 *oldpset = TB_PSET_IS_SOFT(tp) ? PS_SOFT : PS_HARD;
 267                 break;
 268 
 269         default:
 270                 /*
 271                  * Must have the same UID as the target process or
 272                  * have PRIV_PROC_OWNER privilege.
 273                  */
 274                 if (!hasprocperm(tp->t_cred, CRED()))
 275                         return (EPERM);
 276                 /*
 277                  * Unbinding of an unbound thread should always succeed.
 278                  */
 279                 if (*oldpset == PS_NONE && pset == PS_NONE)
 280                         return (0);
 281                 /*
 282                  * Only privileged processes can move threads from psets with
 283                  * PSET_NOESCAPE attribute.
 284                  */
 285                 if ((tp->t_cpupart->cp_attr & PSET_NOESCAPE) &&
 286                     secpolicy_pbind(CRED()) != 0)
 287                         return (EPERM);
 288                 if ((error = cpupart_bind_thread(tp, pset, 0,
 289                     projbuf, zonebuf)) == 0)
 290                         tp->t_bind_pset = pset;
 291 
 292                 break;
 293         }
 294 
 295         return (error);
 296 }
 297 
 298 static int
 299 pset_bind_process(proc_t *pp, psetid_t pset, psetid_t *oldpset, void *projbuf,
 300     void *zonebuf)
 301 {
 302         int error = 0;
 303         kthread_t *tp;
 304 
 305         /* skip kernel processes */
 306         if ((pset != PS_QUERY) && pp->p_flag & SSYS) {
 307                 *oldpset = PS_NONE;
 308                 return (ENOTSUP);
 309         }
 310 
 311         mutex_enter(&pp->p_lock);
 312         tp = pp->p_tlist;
 313         if (tp != NULL) {
 314                 do {
 315                         int rval;
 316 
 317                         rval = pset_bind_thread(tp, pset, oldpset, projbuf,
 318                             zonebuf);
 319                         if (error == 0)
 320                                 error = rval;
 321                 } while ((tp = tp->t_forw) != pp->p_tlist);
 322         } else
 323                 error = ESRCH;
 324         mutex_exit(&pp->p_lock);
 325 
 326         return (error);
 327 }
 328 
 329 static int
 330 pset_bind_task(task_t *tk, psetid_t pset, psetid_t *oldpset, void *projbuf,
 331     void *zonebuf)
 332 {
 333         int error = 0;
 334         proc_t *pp;
 335 
 336         ASSERT(MUTEX_HELD(&pidlock));
 337 
 338         if ((pp = tk->tk_memb_list) == NULL) {
 339                 return (ESRCH);
 340         }
 341 
 342         do {
 343                 int rval;
 344 
 345                 if (!(pp->p_flag & SSYS)) {
 346                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 347                             zonebuf);
 348                         if (error == 0)
 349                                 error = rval;
 350                 }
 351         } while ((pp = pp->p_tasknext) != tk->tk_memb_list);
 352 
 353         return (error);
 354 }
 355 
 356 static int
 357 pset_bind_project(kproject_t *kpj, psetid_t pset, psetid_t *oldpset,
 358     void *projbuf, void *zonebuf)
 359 {
 360         int error = 0;
 361         proc_t *pp;
 362 
 363         ASSERT(MUTEX_HELD(&pidlock));
 364 
 365         for (pp = practive; pp != NULL; pp = pp->p_next) {
 366                 if (pp->p_tlist == NULL)
 367                         continue;
 368                 if (pp->p_task->tk_proj == kpj && !(pp->p_flag & SSYS)) {
 369                         int rval;
 370 
 371                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 372                             zonebuf);
 373                         if (error == 0)
 374                                 error = rval;
 375                 }
 376         }
 377 
 378         return (error);
 379 }
 380 
 381 static int
 382 pset_bind_zone(zone_t *zptr, psetid_t pset, psetid_t *oldpset, void *projbuf,
 383     void *zonebuf)
 384 {
 385         int error = 0;
 386         proc_t *pp;
 387 
 388         ASSERT(MUTEX_HELD(&pidlock));
 389 
 390         for (pp = practive; pp != NULL; pp = pp->p_next) {
 391                 if (pp->p_zone == zptr && !(pp->p_flag & SSYS)) {
 392                         int rval;
 393 
 394                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 395                             zonebuf);
 396                         if (error == 0)
 397                                 error = rval;
 398                 }
 399         }
 400 
 401         return (error);
 402 }
 403 
 404 /*
 405  * Unbind all threads from the specified processor set, or from all
 406  * processor sets.
 407  */
 408 static int
 409 pset_unbind(psetid_t pset, void *projbuf, void *zonebuf, idtype_t idtype)
 410 {
 411         psetid_t olbind;
 412         kthread_t *tp;
 413         int error = 0;
 414         int rval;
 415         proc_t *pp;
 416 
 417         ASSERT(MUTEX_HELD(&cpu_lock));
 418 
 419         if (idtype == P_PSETID && cpupart_find(pset) == NULL)
 420                 return (EINVAL);
 421 
 422         mutex_enter(&pidlock);
 423         for (pp = practive; pp != NULL; pp = pp->p_next) {
 424                 mutex_enter(&pp->p_lock);
 425                 tp = pp->p_tlist;
 426                 /*
 427                  * Skip zombies and kernel processes, and processes in
 428                  * other zones, if called from a non-global zone.
 429                  */
 430                 if (tp == NULL || (pp->p_flag & SSYS) ||
 431                     !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
 432                         mutex_exit(&pp->p_lock);
 433                         continue;
 434                 }
 435                 do {
 436                         if ((idtype == P_PSETID && tp->t_bind_pset != pset) ||
 437                             (idtype == P_ALL && tp->t_bind_pset == PS_NONE))
 438                                 continue;
 439                         rval = pset_bind_thread(tp, PS_NONE, &olbind,
 440                             projbuf, zonebuf);
 441                         if (error == 0)
 442                                 error = rval;
 443                 } while ((tp = tp->t_forw) != pp->p_tlist);
 444                 mutex_exit(&pp->p_lock);
 445         }
 446         mutex_exit(&pidlock);
 447         return (error);
 448 }
 449 
 450 static int
 451 pset_bind_contract(cont_process_t *ctp, psetid_t pset, psetid_t *oldpset,
 452     void *projbuf, void *zonebuf)
 453 {
 454         int error = 0;
 455         proc_t *pp;
 456 
 457         ASSERT(MUTEX_HELD(&pidlock));
 458 
 459         for (pp = practive; pp != NULL; pp = pp->p_next) {
 460                 if (pp->p_ct_process == ctp) {
 461                         int rval;
 462 
 463                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 464                             zonebuf);
 465                         if (error == 0)
 466                                 error = rval;
 467                 }
 468         }
 469 
 470         return (error);
 471 }
 472 
 473 /*
 474  * Bind the lwp:id of process:pid to processor set: pset
 475  */
 476 static int
 477 pset_bind_lwp(psetid_t pset, id_t id, pid_t pid, psetid_t *opset)
 478 {
 479         kthread_t       *tp;
 480         proc_t          *pp;
 481         psetid_t        oldpset;
 482         void            *projbuf, *zonebuf;
 483         int             error = 0;
 484 
 485         pool_lock();
 486         mutex_enter(&cpu_lock);
 487         projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
 488         zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
 489 
 490         mutex_enter(&pidlock);
 491         if ((pid == P_MYID && id == P_MYID) ||
 492             (pid == curproc->p_pid && id == P_MYID)) {
 493                 pp = curproc;
 494                 tp = curthread;
 495                 mutex_enter(&pp->p_lock);
 496         } else {
 497                 if (pid == P_MYID) {
 498                         pp = curproc;
 499                 } else if ((pp = prfind(pid)) == NULL) {
 500                         error = ESRCH;
 501                         goto err;
 502                 }
 503                 if (pp != curproc && id == P_MYID) {
 504                         error = EINVAL;
 505                         goto err;
 506                 }
 507                 mutex_enter(&pp->p_lock);
 508                 if ((tp = idtot(pp, id)) == NULL) {
 509                         mutex_exit(&pp->p_lock);
 510                         error = ESRCH;
 511                         goto err;
 512                 }
 513         }
 514 
 515         error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
 516         mutex_exit(&pp->p_lock);
 517 err:
 518         mutex_exit(&pidlock);
 519 
 520         fss_freebuf(projbuf, FSS_ALLOC_PROJ);
 521         fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
 522         mutex_exit(&cpu_lock);
 523         pool_unlock();
 524         if (opset != NULL) {
 525                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 526                         return (set_errno(EFAULT));
 527         }
 528         if (error != 0)
 529                 return (set_errno(error));
 530         return (0);
 531 }
 532 
 533 static int
 534 pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset)
 535 {
 536         kthread_t       *tp;
 537         proc_t          *pp;
 538         task_t          *tk;
 539         kproject_t      *kpj;
 540         contract_t      *ct;
 541         zone_t          *zptr;
 542         psetid_t        oldpset;
 543         int             error = 0;
 544         void            *projbuf, *zonebuf;
 545 
 546         pool_lock();
 547         if ((pset != PS_QUERY) && (pset != PS_SOFT) &&
 548             (pset != PS_HARD) && (pset != PS_QUERY_TYPE)) {
 549                 /*
 550                  * Check if the set actually exists before checking
 551                  * permissions.  This is the historical error
 552                  * precedence.  Note that if pset was PS_MYID, the
 553                  * cpupart_get_cpus call will change it to the
 554                  * processor set id of the caller (or PS_NONE if the
 555                  * caller is not bound to a processor set).
 556                  */
 557                 if (pool_state == POOL_ENABLED) {
 558                         pool_unlock();
 559                         return (set_errno(ENOTSUP));
 560                 }
 561                 if (cpupart_get_cpus(&pset, NULL, NULL) != 0) {
 562                         pool_unlock();
 563                         return (set_errno(EINVAL));
 564                 } else if (pset != PS_NONE && secpolicy_pbind(CRED()) != 0) {
 565                         pool_unlock();
 566                         return (set_errno(EPERM));
 567                 }
 568         }
 569 
 570         /*
 571          * Pre-allocate enough buffers for FSS for all active projects
 572          * and for all active zones on the system.  Unused buffers will
 573          * be freed later by fss_freebuf().
 574          */
 575         mutex_enter(&cpu_lock);
 576         projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
 577         zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
 578 
 579         switch (idtype) {
 580         case P_LWPID:
 581                 pp = curproc;
 582                 mutex_enter(&pidlock);
 583                 mutex_enter(&pp->p_lock);
 584                 if (id == P_MYID) {
 585                         tp = curthread;
 586                 } else {
 587                         if ((tp = idtot(pp, id)) == NULL) {
 588                                 mutex_exit(&pp->p_lock);
 589                                 mutex_exit(&pidlock);
 590                                 error = ESRCH;
 591                                 break;
 592                         }
 593                 }
 594                 error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
 595                 mutex_exit(&pp->p_lock);
 596                 mutex_exit(&pidlock);
 597                 break;
 598 
 599         case P_PID:
 600                 mutex_enter(&pidlock);
 601                 if (id == P_MYID) {
 602                         pp = curproc;
 603                 } else if ((pp = prfind(id)) == NULL) {
 604                         mutex_exit(&pidlock);
 605                         error = ESRCH;
 606                         break;
 607                 }
 608                 error = pset_bind_process(pp, pset, &oldpset, projbuf, zonebuf);
 609                 mutex_exit(&pidlock);
 610                 break;
 611 
 612         case P_TASKID:
 613                 mutex_enter(&pidlock);
 614                 if (id == P_MYID)
 615                         id = curproc->p_task->tk_tkid;
 616                 if ((tk = task_hold_by_id(id)) == NULL) {
 617                         mutex_exit(&pidlock);
 618                         error = ESRCH;
 619                         break;
 620                 }
 621                 error = pset_bind_task(tk, pset, &oldpset, projbuf, zonebuf);
 622                 mutex_exit(&pidlock);
 623                 task_rele(tk);
 624                 break;
 625 
 626         case P_PROJID:
 627                 pp = curproc;
 628                 if (id == P_MYID)
 629                         id = curprojid();
 630                 if ((kpj = project_hold_by_id(id, pp->p_zone,
 631                     PROJECT_HOLD_FIND)) == NULL) {
 632                         error = ESRCH;
 633                         break;
 634                 }
 635                 mutex_enter(&pidlock);
 636                 error = pset_bind_project(kpj, pset, &oldpset, projbuf,
 637                     zonebuf);
 638                 mutex_exit(&pidlock);
 639                 project_rele(kpj);
 640                 break;
 641 
 642         case P_ZONEID:
 643                 if (id == P_MYID)
 644                         id = getzoneid();
 645                 if ((zptr = zone_find_by_id(id)) == NULL) {
 646                         error = ESRCH;
 647                         break;
 648                 }
 649                 mutex_enter(&pidlock);
 650                 error = pset_bind_zone(zptr, pset, &oldpset, projbuf, zonebuf);
 651                 mutex_exit(&pidlock);
 652                 zone_rele(zptr);
 653                 break;
 654 
 655         case P_CTID:
 656                 if (id == P_MYID)
 657                         id = PRCTID(curproc);
 658                 if ((ct = contract_type_ptr(process_type, id,
 659                     curproc->p_zone->zone_uniqid)) == NULL) {
 660                         error = ESRCH;
 661                         break;
 662                 }
 663                 mutex_enter(&pidlock);
 664                 error = pset_bind_contract(ct->ct_data, pset, &oldpset, projbuf,
 665                     zonebuf);
 666                 mutex_exit(&pidlock);
 667                 contract_rele(ct);
 668                 break;
 669 
 670         case P_PSETID:
 671                 if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
 672                         error = EINVAL;
 673                         break;
 674                 }
 675                 error = pset_unbind(id, projbuf, zonebuf, idtype);
 676                 break;
 677 
 678         case P_ALL:
 679                 if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
 680                         error = EINVAL;
 681                         break;
 682                 }
 683                 error = pset_unbind(PS_NONE, projbuf, zonebuf, idtype);
 684                 break;
 685 
 686         default:
 687                 error = EINVAL;
 688                 break;
 689         }
 690 
 691         fss_freebuf(projbuf, FSS_ALLOC_PROJ);
 692         fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
 693         mutex_exit(&cpu_lock);
 694         pool_unlock();
 695 
 696         if (error != 0)
 697                 return (set_errno(error));
 698         if (opset != NULL) {
 699                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 700                         return (set_errno(EFAULT));
 701         }
 702         return (0);
 703 }
 704 
 705 /*
 706  * Report load average statistics for the specified processor set.
 707  */
 708 static int
 709 pset_getloadavg(psetid_t pset, int *buf, int nelem)
 710 {
 711         int loadbuf[LOADAVG_NSTATS];
 712         int error = 0;
 713 
 714         if (nelem < 0)
 715                 return (set_errno(EINVAL));
 716 
 717         /*
 718          * We keep the same number of load average statistics for processor
 719          * sets as we do for the system as a whole.
 720          */
 721         if (nelem > LOADAVG_NSTATS)
 722                 nelem = LOADAVG_NSTATS;
 723 
 724         mutex_enter(&cpu_lock);
 725         error = cpupart_get_loadavg(pset, loadbuf, nelem);
 726         mutex_exit(&cpu_lock);
 727         if (!error && nelem && copyout(loadbuf, buf, nelem * sizeof (int)) != 0)
 728                 error = EFAULT;
 729 
 730         if (error)
 731                 return (set_errno(error));
 732         else
 733                 return (0);
 734 }
 735 
 736 
 737 /*
 738  * Return list of active processor sets, up to a maximum indicated by
 739  * numpsets.  The total number of processor sets is stored in the
 740  * location pointed to by numpsets.
 741  */
 742 static int
 743 pset_list(psetid_t *psetlist, uint_t *numpsets)
 744 {
 745         uint_t user_npsets = 0;
 746         uint_t real_npsets;
 747         psetid_t *psets = NULL;
 748         int error = 0;
 749 
 750         if (numpsets != NULL) {
 751                 if (copyin(numpsets, &user_npsets, sizeof (uint_t)) != 0)
 752                         return (set_errno(EFAULT));
 753         }
 754 
 755         /*
 756          * Get the list of all processor sets.  First we need to find
 757          * out how many there are, so we can allocate a large enough
 758          * buffer.
 759          */
 760         mutex_enter(&cpu_lock);
 761         if (!INGLOBALZONE(curproc) && pool_pset_enabled()) {
 762                 psetid_t psetid = zone_pset_get(curproc->p_zone);
 763 
 764                 if (psetid == PS_NONE) {
 765                         real_npsets = 0;
 766                 } else {
 767                         real_npsets = 1;
 768                         psets = kmem_alloc(real_npsets * sizeof (psetid_t),
 769                             KM_SLEEP);
 770                         psets[0] = psetid;
 771                 }
 772         } else {
 773                 real_npsets = cpupart_list(0, NULL, CP_ALL);
 774                 if (real_npsets) {
 775                         psets = kmem_alloc(real_npsets * sizeof (psetid_t),
 776                             KM_SLEEP);
 777                         (void) cpupart_list(psets, real_npsets, CP_ALL);
 778                 }
 779         }
 780         mutex_exit(&cpu_lock);
 781 
 782         if (user_npsets > real_npsets)
 783                 user_npsets = real_npsets;
 784 
 785         if (numpsets != NULL) {
 786                 if (copyout(&real_npsets, numpsets, sizeof (uint_t)) != 0)
 787                         error = EFAULT;
 788                 else if (psetlist != NULL && user_npsets != 0) {
 789                         if (copyout(psets, psetlist,
 790                             user_npsets * sizeof (psetid_t)) != 0)
 791                                 error = EFAULT;
 792                 }
 793         }
 794 
 795         if (real_npsets)
 796                 kmem_free(psets, real_npsets * sizeof (psetid_t));
 797 
 798         if (error)
 799                 return (set_errno(error));
 800         else
 801                 return (0);
 802 }
 803 
 804 static int
 805 pset_setattr(psetid_t pset, uint_t attr)
 806 {
 807         int error;
 808 
 809         if (secpolicy_pset(CRED()) != 0)
 810                 return (set_errno(EPERM));
 811         pool_lock();
 812         if (pool_state == POOL_ENABLED) {
 813                 pool_unlock();
 814                 return (set_errno(ENOTSUP));
 815         }
 816         if (pset == PS_QUERY || PSET_BADATTR(attr)) {
 817                 pool_unlock();
 818                 return (set_errno(EINVAL));
 819         }
 820         if ((error = cpupart_setattr(pset, attr)) != 0) {
 821                 pool_unlock();
 822                 return (set_errno(error));
 823         }
 824         pool_unlock();
 825         return (0);
 826 }
 827 
 828 static int
 829 pset_getattr(psetid_t pset, uint_t *attrp)
 830 {
 831         int error = 0;
 832         uint_t attr;
 833 
 834         if (pset == PS_QUERY)
 835                 return (set_errno(EINVAL));
 836         if ((error = cpupart_getattr(pset, &attr)) != 0)
 837                 return (set_errno(error));
 838         if (copyout(&attr, attrp, sizeof (uint_t)) != 0)
 839                 return (set_errno(EFAULT));
 840         return (0);
 841 }
 842 
 843 static int
 844 pset(int subcode, long arg1, long arg2, long arg3, long arg4)
 845 {
 846         switch (subcode) {
 847         case PSET_CREATE:
 848                 return (pset_create((psetid_t *)arg1));
 849         case PSET_DESTROY:
 850                 return (pset_destroy((psetid_t)arg1));
 851         case PSET_ASSIGN:
 852                 return (pset_assign((psetid_t)arg1,
 853                     (processorid_t)arg2, (psetid_t *)arg3, 0));
 854         case PSET_INFO:
 855                 return (pset_info((psetid_t)arg1, (int *)arg2,
 856                     (uint_t *)arg3, (processorid_t *)arg4));
 857         case PSET_BIND:
 858                 return (pset_bind((psetid_t)arg1, (idtype_t)arg2,
 859                     (id_t)arg3, (psetid_t *)arg4));
 860         case PSET_BIND_LWP:
 861                 return (pset_bind_lwp((psetid_t)arg1, (id_t)arg2,
 862                     (pid_t)arg3, (psetid_t *)arg4));
 863         case PSET_GETLOADAVG:
 864                 return (pset_getloadavg((psetid_t)arg1, (int *)arg2,
 865                     (int)arg3));
 866         case PSET_LIST:
 867                 return (pset_list((psetid_t *)arg1, (uint_t *)arg2));
 868         case PSET_SETATTR:
 869                 return (pset_setattr((psetid_t)arg1, (uint_t)arg2));
 870         case PSET_GETATTR:
 871                 return (pset_getattr((psetid_t)arg1, (uint_t *)arg2));
 872         case PSET_ASSIGN_FORCED:
 873                 return (pset_assign((psetid_t)arg1,
 874                     (processorid_t)arg2, (psetid_t *)arg3, 1));
 875         default:
 876                 return (set_errno(EINVAL));
 877         }
 878 }