1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/systm.h>
  27 #include <sys/cmn_err.h>
  28 #include <sys/cpuvar.h>
  29 #include <sys/thread.h>
  30 #include <sys/disp.h>
  31 #include <sys/kmem.h>
  32 #include <sys/debug.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/cpupart.h>
  35 #include <sys/pset.h>
  36 #include <sys/modctl.h>
  37 #include <sys/syscall.h>
  38 #include <sys/task.h>
  39 #include <sys/loadavg.h>
  40 #include <sys/fss.h>
  41 #include <sys/pool.h>
  42 #include <sys/pool_pset.h>
  43 #include <sys/policy.h>
  44 #include <sys/zone.h>
  45 #include <sys/contract/process_impl.h>
  46 
  47 static int      pset(int, long, long, long, long);
  48 
  49 static struct sysent pset_sysent = {
  50         5,
  51         SE_ARGC | SE_NOUNLOAD,
  52         (int (*)())pset,
  53 };
  54 
  55 static struct modlsys modlsys = {
  56         &mod_syscallops, "processor sets", &pset_sysent
  57 };
  58 
  59 #ifdef _SYSCALL32_IMPL
  60 static struct modlsys modlsys32 = {
  61         &mod_syscallops32, "32-bit pset(2) syscall", &pset_sysent
  62 };
  63 #endif
  64 
  65 static struct modlinkage modlinkage = {
  66         MODREV_1,
  67         &modlsys,
  68 #ifdef _SYSCALL32_IMPL
  69         &modlsys32,
  70 #endif
  71         NULL
  72 };
  73 
  74 #define PSET_BADATTR(attr)      ((~PSET_NOESCAPE) & (attr))
  75 
  76 int
  77 _init(void)
  78 {
  79         return (mod_install(&modlinkage));
  80 }
  81 
  82 int
  83 _info(struct modinfo *modinfop)
  84 {
  85         return (mod_info(&modlinkage, modinfop));
  86 }
  87 
  88 static int
  89 pset_create(psetid_t *psetp)
  90 {
  91         psetid_t newpset;
  92         int error;
  93 
  94         if (secpolicy_pset(CRED()) != 0)
  95                 return (set_errno(EPERM));
  96 
  97         pool_lock();
  98         if (pool_state == POOL_ENABLED) {
  99                 pool_unlock();
 100                 return (set_errno(ENOTSUP));
 101         }
 102         error = cpupart_create(&newpset);
 103         if (error) {
 104                 pool_unlock();
 105                 return (set_errno(error));
 106         }
 107         if (copyout(&newpset, psetp, sizeof (psetid_t)) != 0) {
 108                 (void) cpupart_destroy(newpset);
 109                 pool_unlock();
 110                 return (set_errno(EFAULT));
 111         }
 112         pool_unlock();
 113         return (error);
 114 }
 115 
 116 static int
 117 pset_destroy(psetid_t pset)
 118 {
 119         int error;
 120 
 121         if (secpolicy_pset(CRED()) != 0)
 122                 return (set_errno(EPERM));
 123 
 124         pool_lock();
 125         if (pool_state == POOL_ENABLED) {
 126                 pool_unlock();
 127                 return (set_errno(ENOTSUP));
 128         }
 129         error = cpupart_destroy(pset);
 130         pool_unlock();
 131         if (error)
 132                 return (set_errno(error));
 133         else
 134                 return (0);
 135 }
 136 
 137 static int
 138 pset_assign(psetid_t pset, processorid_t cpuid, psetid_t *opset, int forced)
 139 {
 140         psetid_t oldpset;
 141         int     error = 0;
 142         cpu_t   *cp;
 143 
 144         if (pset != PS_QUERY && secpolicy_pset(CRED()) != 0)
 145                 return (set_errno(EPERM));
 146 
 147         pool_lock();
 148         if (pset != PS_QUERY && pool_state == POOL_ENABLED) {
 149                 pool_unlock();
 150                 return (set_errno(ENOTSUP));
 151         }
 152 
 153         mutex_enter(&cpu_lock);
 154         if ((cp = cpu_get(cpuid)) == NULL) {
 155                 mutex_exit(&cpu_lock);
 156                 pool_unlock();
 157                 return (set_errno(EINVAL));
 158         }
 159 
 160         oldpset = cpupart_query_cpu(cp);
 161 
 162         if (pset != PS_QUERY)
 163                 error = cpupart_attach_cpu(pset, cp, forced);
 164         mutex_exit(&cpu_lock);
 165         pool_unlock();
 166 
 167         if (error)
 168                 return (set_errno(error));
 169 
 170         if (opset != NULL)
 171                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 172                         return (set_errno(EFAULT));
 173 
 174         return (0);
 175 }
 176 
 177 static int
 178 pset_info(psetid_t pset, int *typep, uint_t *numcpusp,
 179     processorid_t *cpulistp)
 180 {
 181         int pset_type;
 182         uint_t user_ncpus = 0, real_ncpus, copy_ncpus;
 183         processorid_t *pset_cpus = NULL;
 184         int error = 0;
 185 
 186         if (numcpusp != NULL) {
 187                 if (copyin(numcpusp, &user_ncpus, sizeof (uint_t)) != 0)
 188                         return (set_errno(EFAULT));
 189         }
 190 
 191         if (user_ncpus > max_ncpus)  /* sanity check */
 192                 user_ncpus = max_ncpus;
 193         if (user_ncpus != 0 && cpulistp != NULL)
 194                 pset_cpus = kmem_alloc(sizeof (processorid_t) * user_ncpus,
 195                     KM_SLEEP);
 196 
 197         real_ncpus = user_ncpus;
 198         if ((error = cpupart_get_cpus(&pset, pset_cpus, &real_ncpus)) != 0)
 199                 goto out;
 200 
 201         /*
 202          * Now copyout the information about this processor set.
 203          */
 204 
 205         /*
 206          * Get number of cpus to copy back.  If the user didn't pass in
 207          * a big enough buffer, only copy back as many cpus as fits in
 208          * the buffer but copy back the real number of cpus.
 209          */
 210 
 211         if (user_ncpus != 0 && cpulistp != NULL) {
 212                 copy_ncpus = MIN(real_ncpus, user_ncpus);
 213                 if (copyout(pset_cpus, cpulistp,
 214                     sizeof (processorid_t) * copy_ncpus) != 0) {
 215                         error = EFAULT;
 216                         goto out;
 217                 }
 218         }
 219         if (pset_cpus != NULL)
 220                 kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
 221         if (typep != NULL) {
 222                 if (pset == PS_NONE)
 223                         pset_type = PS_NONE;
 224                 else
 225                         pset_type = PS_PRIVATE;
 226                 if (copyout(&pset_type, typep, sizeof (int)) != 0)
 227                         return (set_errno(EFAULT));
 228         }
 229         if (numcpusp != NULL)
 230                 if (copyout(&real_ncpus, numcpusp, sizeof (uint_t)) != 0)
 231                         return (set_errno(EFAULT));
 232         return (0);
 233 
 234 out:
 235         if (pset_cpus != NULL)
 236                 kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
 237         return (set_errno(error));
 238 }
 239 
 240 static int
 241 pset_bind_thread(kthread_t *tp, psetid_t pset, psetid_t *oldpset, void *projbuf,
 242     void *zonebuf)
 243 {
 244         int error = 0;
 245 
 246         ASSERT(pool_lock_held());
 247         ASSERT(MUTEX_HELD(&cpu_lock));
 248         ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
 249 
 250         *oldpset = tp->t_bind_pset;
 251 
 252         switch (pset) {
 253         case PS_SOFT:
 254                 TB_PSET_SOFT_SET(tp);
 255                 break;
 256 
 257         case PS_HARD:
 258                 TB_PSET_HARD_SET(tp);
 259                 break;
 260 
 261         case PS_QUERY:
 262                 break;
 263 
 264         case PS_QUERY_TYPE:
 265                 *oldpset = TB_PSET_IS_SOFT(tp) ? PS_SOFT : PS_HARD;
 266                 break;
 267 
 268         default:
 269                 /*
 270                  * Must have the same UID as the target process or
 271                  * have PRIV_PROC_OWNER privilege.
 272                  */
 273                 if (!hasprocperm(tp->t_cred, CRED()))
 274                         return (EPERM);
 275                 /*
 276                  * Unbinding of an unbound thread should always succeed.
 277                  */
 278                 if (*oldpset == PS_NONE && pset == PS_NONE)
 279                         return (0);
 280                 /*
 281                  * Only privileged processes can move threads from psets with
 282                  * PSET_NOESCAPE attribute.
 283                  */
 284                 if ((tp->t_cpupart->cp_attr & PSET_NOESCAPE) &&
 285                     secpolicy_pbind(CRED()) != 0)
 286                         return (EPERM);
 287                 if ((error = cpupart_bind_thread(tp, pset, 0,
 288                     projbuf, zonebuf)) == 0)
 289                         tp->t_bind_pset = pset;
 290 
 291                 break;
 292         }
 293 
 294         return (error);
 295 }
 296 
 297 static int
 298 pset_bind_process(proc_t *pp, psetid_t pset, psetid_t *oldpset, void *projbuf,
 299     void *zonebuf)
 300 {
 301         int error = 0;
 302         kthread_t *tp;
 303 
 304         /* skip kernel processes */
 305         if ((pset != PS_QUERY) && pp->p_flag & SSYS) {
 306                 *oldpset = PS_NONE;
 307                 return (ENOTSUP);
 308         }
 309 
 310         mutex_enter(&pp->p_lock);
 311         tp = pp->p_tlist;
 312         if (tp != NULL) {
 313                 do {
 314                         int rval;
 315 
 316                         rval = pset_bind_thread(tp, pset, oldpset, projbuf,
 317                             zonebuf);
 318                         if (error == 0)
 319                                 error = rval;
 320                 } while ((tp = tp->t_forw) != pp->p_tlist);
 321         } else
 322                 error = ESRCH;
 323         mutex_exit(&pp->p_lock);
 324 
 325         return (error);
 326 }
 327 
 328 static int
 329 pset_bind_task(task_t *tk, psetid_t pset, psetid_t *oldpset, void *projbuf,
 330     void *zonebuf)
 331 {
 332         int error = 0;
 333         proc_t *pp;
 334 
 335         ASSERT(MUTEX_HELD(&pidlock));
 336 
 337         if ((pp = tk->tk_memb_list) == NULL) {
 338                 return (ESRCH);
 339         }
 340 
 341         do {
 342                 int rval;
 343 
 344                 if (!(pp->p_flag & SSYS)) {
 345                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 346                             zonebuf);
 347                         if (error == 0)
 348                                 error = rval;
 349                 }
 350         } while ((pp = pp->p_tasknext) != tk->tk_memb_list);
 351 
 352         return (error);
 353 }
 354 
 355 static int
 356 pset_bind_project(kproject_t *kpj, psetid_t pset, psetid_t *oldpset,
 357     void *projbuf, void *zonebuf)
 358 {
 359         int error = 0;
 360         proc_t *pp;
 361 
 362         ASSERT(MUTEX_HELD(&pidlock));
 363 
 364         for (pp = practive; pp != NULL; pp = pp->p_next) {
 365                 if (pp->p_tlist == NULL)
 366                         continue;
 367                 if (pp->p_task->tk_proj == kpj && !(pp->p_flag & SSYS)) {
 368                         int rval;
 369 
 370                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 371                             zonebuf);
 372                         if (error == 0)
 373                                 error = rval;
 374                 }
 375         }
 376 
 377         return (error);
 378 }
 379 
 380 static int
 381 pset_bind_zone(zone_t *zptr, psetid_t pset, psetid_t *oldpset, void *projbuf,
 382     void *zonebuf)
 383 {
 384         int error = 0;
 385         proc_t *pp;
 386 
 387         ASSERT(MUTEX_HELD(&pidlock));
 388 
 389         for (pp = practive; pp != NULL; pp = pp->p_next) {
 390                 if (pp->p_zone == zptr && !(pp->p_flag & SSYS)) {
 391                         int rval;
 392 
 393                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 394                             zonebuf);
 395                         if (error == 0)
 396                                 error = rval;
 397                 }
 398         }
 399 
 400         return (error);
 401 }
 402 
 403 /*
 404  * Unbind all threads from the specified processor set, or from all
 405  * processor sets.
 406  */
 407 static int
 408 pset_unbind(psetid_t pset, void *projbuf, void *zonebuf, idtype_t idtype)
 409 {
 410         psetid_t olbind;
 411         kthread_t *tp;
 412         int error = 0;
 413         int rval;
 414         proc_t *pp;
 415 
 416         ASSERT(MUTEX_HELD(&cpu_lock));
 417 
 418         if (idtype == P_PSETID && cpupart_find(pset) == NULL)
 419                 return (EINVAL);
 420 
 421         mutex_enter(&pidlock);
 422         for (pp = practive; pp != NULL; pp = pp->p_next) {
 423                 mutex_enter(&pp->p_lock);
 424                 tp = pp->p_tlist;
 425                 /*
 426                  * Skip zombies and kernel processes, and processes in
 427                  * other zones, if called from a non-global zone.
 428                  */
 429                 if (tp == NULL || (pp->p_flag & SSYS) ||
 430                     !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
 431                         mutex_exit(&pp->p_lock);
 432                         continue;
 433                 }
 434                 do {
 435                         if ((idtype == P_PSETID && tp->t_bind_pset != pset) ||
 436                             (idtype == P_ALL && tp->t_bind_pset == PS_NONE))
 437                                 continue;
 438                         rval = pset_bind_thread(tp, PS_NONE, &olbind,
 439                             projbuf, zonebuf);
 440                         if (error == 0)
 441                                 error = rval;
 442                 } while ((tp = tp->t_forw) != pp->p_tlist);
 443                 mutex_exit(&pp->p_lock);
 444         }
 445         mutex_exit(&pidlock);
 446         return (error);
 447 }
 448 
 449 static int
 450 pset_bind_contract(cont_process_t *ctp, psetid_t pset, psetid_t *oldpset,
 451     void *projbuf, void *zonebuf)
 452 {
 453         int error = 0;
 454         proc_t *pp;
 455 
 456         ASSERT(MUTEX_HELD(&pidlock));
 457 
 458         for (pp = practive; pp != NULL; pp = pp->p_next) {
 459                 if (pp->p_ct_process == ctp) {
 460                         int rval;
 461 
 462                         rval = pset_bind_process(pp, pset, oldpset, projbuf,
 463                             zonebuf);
 464                         if (error == 0)
 465                                 error = rval;
 466                 }
 467         }
 468 
 469         return (error);
 470 }
 471 
 472 /*
 473  * Bind the lwp:id of process:pid to processor set: pset
 474  */
 475 static int
 476 pset_bind_lwp(psetid_t pset, id_t id, pid_t pid, psetid_t *opset)
 477 {
 478         kthread_t       *tp;
 479         proc_t          *pp;
 480         psetid_t        oldpset;
 481         void            *projbuf, *zonebuf;
 482         int             error = 0;
 483 
 484         pool_lock();
 485         mutex_enter(&cpu_lock);
 486         projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
 487         zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
 488 
 489         mutex_enter(&pidlock);
 490         if ((pid == P_MYID && id == P_MYID) ||
 491             (pid == curproc->p_pid && id == P_MYID)) {
 492                 pp = curproc;
 493                 tp = curthread;
 494                 mutex_enter(&pp->p_lock);
 495         } else {
 496                 if (pid == P_MYID) {
 497                         pp = curproc;
 498                 } else if ((pp = prfind(pid)) == NULL) {
 499                         error = ESRCH;
 500                         goto err;
 501                 }
 502                 if (pp != curproc && id == P_MYID) {
 503                         error = EINVAL;
 504                         goto err;
 505                 }
 506                 mutex_enter(&pp->p_lock);
 507                 if ((tp = idtot(pp, id)) == NULL) {
 508                         mutex_exit(&pp->p_lock);
 509                         error = ESRCH;
 510                         goto err;
 511                 }
 512         }
 513 
 514         error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
 515         mutex_exit(&pp->p_lock);
 516 err:
 517         mutex_exit(&pidlock);
 518 
 519         fss_freebuf(projbuf, FSS_ALLOC_PROJ);
 520         fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
 521         mutex_exit(&cpu_lock);
 522         pool_unlock();
 523         if (opset != NULL) {
 524                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 525                         return (set_errno(EFAULT));
 526         }
 527         if (error != 0)
 528                 return (set_errno(error));
 529         return (0);
 530 }
 531 
 532 static int
 533 pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset)
 534 {
 535         kthread_t       *tp;
 536         proc_t          *pp;
 537         task_t          *tk;
 538         kproject_t      *kpj;
 539         contract_t      *ct;
 540         zone_t          *zptr;
 541         psetid_t        oldpset;
 542         int             error = 0;
 543         void            *projbuf, *zonebuf;
 544 
 545         pool_lock();
 546         if ((pset != PS_QUERY) && (pset != PS_SOFT) &&
 547             (pset != PS_HARD) && (pset != PS_QUERY_TYPE)) {
 548                 /*
 549                  * Check if the set actually exists before checking
 550                  * permissions.  This is the historical error
 551                  * precedence.  Note that if pset was PS_MYID, the
 552                  * cpupart_get_cpus call will change it to the
 553                  * processor set id of the caller (or PS_NONE if the
 554                  * caller is not bound to a processor set).
 555                  */
 556                 if (pool_state == POOL_ENABLED) {
 557                         pool_unlock();
 558                         return (set_errno(ENOTSUP));
 559                 }
 560                 if (cpupart_get_cpus(&pset, NULL, NULL) != 0) {
 561                         pool_unlock();
 562                         return (set_errno(EINVAL));
 563                 } else if (pset != PS_NONE && secpolicy_pbind(CRED()) != 0) {
 564                         pool_unlock();
 565                         return (set_errno(EPERM));
 566                 }
 567         }
 568 
 569         /*
 570          * Pre-allocate enough buffers for FSS for all active projects
 571          * and for all active zones on the system.  Unused buffers will
 572          * be freed later by fss_freebuf().
 573          */
 574         mutex_enter(&cpu_lock);
 575         projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
 576         zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
 577 
 578         switch (idtype) {
 579         case P_LWPID:
 580                 pp = curproc;
 581                 mutex_enter(&pidlock);
 582                 mutex_enter(&pp->p_lock);
 583                 if (id == P_MYID) {
 584                         tp = curthread;
 585                 } else {
 586                         if ((tp = idtot(pp, id)) == NULL) {
 587                                 mutex_exit(&pp->p_lock);
 588                                 mutex_exit(&pidlock);
 589                                 error = ESRCH;
 590                                 break;
 591                         }
 592                 }
 593                 error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
 594                 mutex_exit(&pp->p_lock);
 595                 mutex_exit(&pidlock);
 596                 break;
 597 
 598         case P_PID:
 599                 mutex_enter(&pidlock);
 600                 if (id == P_MYID) {
 601                         pp = curproc;
 602                 } else if ((pp = prfind(id)) == NULL) {
 603                         mutex_exit(&pidlock);
 604                         error = ESRCH;
 605                         break;
 606                 }
 607                 error = pset_bind_process(pp, pset, &oldpset, projbuf, zonebuf);
 608                 mutex_exit(&pidlock);
 609                 break;
 610 
 611         case P_TASKID:
 612                 mutex_enter(&pidlock);
 613                 if (id == P_MYID)
 614                         id = curproc->p_task->tk_tkid;
 615                 if ((tk = task_hold_by_id(id)) == NULL) {
 616                         mutex_exit(&pidlock);
 617                         error = ESRCH;
 618                         break;
 619                 }
 620                 error = pset_bind_task(tk, pset, &oldpset, projbuf, zonebuf);
 621                 mutex_exit(&pidlock);
 622                 task_rele(tk);
 623                 break;
 624 
 625         case P_PROJID:
 626                 pp = curproc;
 627                 if (id == P_MYID)
 628                         id = curprojid();
 629                 if ((kpj = project_hold_by_id(id, pp->p_zone,
 630                     PROJECT_HOLD_FIND)) == NULL) {
 631                         error = ESRCH;
 632                         break;
 633                 }
 634                 mutex_enter(&pidlock);
 635                 error = pset_bind_project(kpj, pset, &oldpset, projbuf,
 636                     zonebuf);
 637                 mutex_exit(&pidlock);
 638                 project_rele(kpj);
 639                 break;
 640 
 641         case P_ZONEID:
 642                 if (id == P_MYID)
 643                         id = getzoneid();
 644                 if ((zptr = zone_find_by_id(id)) == NULL) {
 645                         error = ESRCH;
 646                         break;
 647                 }
 648                 mutex_enter(&pidlock);
 649                 error = pset_bind_zone(zptr, pset, &oldpset, projbuf, zonebuf);
 650                 mutex_exit(&pidlock);
 651                 zone_rele(zptr);
 652                 break;
 653 
 654         case P_CTID:
 655                 if (id == P_MYID)
 656                         id = PRCTID(curproc);
 657                 if ((ct = contract_type_ptr(process_type, id,
 658                     curproc->p_zone->zone_uniqid)) == NULL) {
 659                         error = ESRCH;
 660                         break;
 661                 }
 662                 mutex_enter(&pidlock);
 663                 error = pset_bind_contract(ct->ct_data, pset, &oldpset, projbuf,
 664                     zonebuf);
 665                 mutex_exit(&pidlock);
 666                 contract_rele(ct);
 667                 break;
 668 
 669         case P_PSETID:
 670                 if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
 671                         error = EINVAL;
 672                         break;
 673                 }
 674                 error = pset_unbind(id, projbuf, zonebuf, idtype);
 675                 break;
 676 
 677         case P_ALL:
 678                 if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
 679                         error = EINVAL;
 680                         break;
 681                 }
 682                 error = pset_unbind(PS_NONE, projbuf, zonebuf, idtype);
 683                 break;
 684 
 685         default:
 686                 error = EINVAL;
 687                 break;
 688         }
 689 
 690         fss_freebuf(projbuf, FSS_ALLOC_PROJ);
 691         fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
 692         mutex_exit(&cpu_lock);
 693         pool_unlock();
 694 
 695         if (error != 0)
 696                 return (set_errno(error));
 697         if (opset != NULL) {
 698                 if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
 699                         return (set_errno(EFAULT));
 700         }
 701         return (0);
 702 }
 703 
 704 /*
 705  * Report load average statistics for the specified processor set.
 706  */
 707 static int
 708 pset_getloadavg(psetid_t pset, int *buf, int nelem)
 709 {
 710         int loadbuf[LOADAVG_NSTATS];
 711         int error = 0;
 712 
 713         if (nelem < 0)
 714                 return (set_errno(EINVAL));
 715 
 716         /*
 717          * We keep the same number of load average statistics for processor
 718          * sets as we do for the system as a whole.
 719          */
 720         if (nelem > LOADAVG_NSTATS)
 721                 nelem = LOADAVG_NSTATS;
 722 
 723         mutex_enter(&cpu_lock);
 724         error = cpupart_get_loadavg(pset, loadbuf, nelem);
 725         mutex_exit(&cpu_lock);
 726         if (!error && nelem && copyout(loadbuf, buf, nelem * sizeof (int)) != 0)
 727                 error = EFAULT;
 728 
 729         if (error)
 730                 return (set_errno(error));
 731         else
 732                 return (0);
 733 }
 734 
 735 
 736 /*
 737  * Return list of active processor sets, up to a maximum indicated by
 738  * numpsets.  The total number of processor sets is stored in the
 739  * location pointed to by numpsets.
 740  */
 741 static int
 742 pset_list(psetid_t *psetlist, uint_t *numpsets)
 743 {
 744         uint_t user_npsets = 0;
 745         uint_t real_npsets;
 746         psetid_t *psets = NULL;
 747         int error = 0;
 748 
 749         if (numpsets != NULL) {
 750                 if (copyin(numpsets, &user_npsets, sizeof (uint_t)) != 0)
 751                         return (set_errno(EFAULT));
 752         }
 753 
 754         /*
 755          * Get the list of all processor sets.  First we need to find
 756          * out how many there are, so we can allocate a large enough
 757          * buffer.
 758          */
 759         mutex_enter(&cpu_lock);
 760         if (!INGLOBALZONE(curproc) && pool_pset_enabled()) {
 761                 psetid_t psetid = zone_pset_get(curproc->p_zone);
 762 
 763                 if (psetid == PS_NONE) {
 764                         real_npsets = 0;
 765                 } else {
 766                         real_npsets = 1;
 767                         psets = kmem_alloc(real_npsets * sizeof (psetid_t),
 768                             KM_SLEEP);
 769                         psets[0] = psetid;
 770                 }
 771         } else {
 772                 real_npsets = cpupart_list(0, NULL, CP_ALL);
 773                 if (real_npsets) {
 774                         psets = kmem_alloc(real_npsets * sizeof (psetid_t),
 775                             KM_SLEEP);
 776                         (void) cpupart_list(psets, real_npsets, CP_ALL);
 777                 }
 778         }
 779         mutex_exit(&cpu_lock);
 780 
 781         if (user_npsets > real_npsets)
 782                 user_npsets = real_npsets;
 783 
 784         if (numpsets != NULL) {
 785                 if (copyout(&real_npsets, numpsets, sizeof (uint_t)) != 0)
 786                         error = EFAULT;
 787                 else if (psetlist != NULL && user_npsets != 0) {
 788                         if (copyout(psets, psetlist,
 789                             user_npsets * sizeof (psetid_t)) != 0)
 790                                 error = EFAULT;
 791                 }
 792         }
 793 
 794         if (real_npsets)
 795                 kmem_free(psets, real_npsets * sizeof (psetid_t));
 796 
 797         if (error)
 798                 return (set_errno(error));
 799         else
 800                 return (0);
 801 }
 802 
 803 static int
 804 pset_setattr(psetid_t pset, uint_t attr)
 805 {
 806         int error;
 807 
 808         if (secpolicy_pset(CRED()) != 0)
 809                 return (set_errno(EPERM));
 810         pool_lock();
 811         if (pool_state == POOL_ENABLED) {
 812                 pool_unlock();
 813                 return (set_errno(ENOTSUP));
 814         }
 815         if (pset == PS_QUERY || PSET_BADATTR(attr)) {
 816                 pool_unlock();
 817                 return (set_errno(EINVAL));
 818         }
 819         if ((error = cpupart_setattr(pset, attr)) != 0) {
 820                 pool_unlock();
 821                 return (set_errno(error));
 822         }
 823         pool_unlock();
 824         return (0);
 825 }
 826 
 827 static int
 828 pset_getattr(psetid_t pset, uint_t *attrp)
 829 {
 830         int error = 0;
 831         uint_t attr;
 832 
 833         if (pset == PS_QUERY)
 834                 return (set_errno(EINVAL));
 835         if ((error = cpupart_getattr(pset, &attr)) != 0)
 836                 return (set_errno(error));
 837         if (copyout(&attr, attrp, sizeof (uint_t)) != 0)
 838                 return (set_errno(EFAULT));
 839         return (0);
 840 }
 841 
 842 static int
 843 pset(int subcode, long arg1, long arg2, long arg3, long arg4)
 844 {
 845         switch (subcode) {
 846         case PSET_CREATE:
 847                 return (pset_create((psetid_t *)arg1));
 848         case PSET_DESTROY:
 849                 return (pset_destroy((psetid_t)arg1));
 850         case PSET_ASSIGN:
 851                 return (pset_assign((psetid_t)arg1,
 852                     (processorid_t)arg2, (psetid_t *)arg3, 0));
 853         case PSET_INFO:
 854                 return (pset_info((psetid_t)arg1, (int *)arg2,
 855                     (uint_t *)arg3, (processorid_t *)arg4));
 856         case PSET_BIND:
 857                 return (pset_bind((psetid_t)arg1, (idtype_t)arg2,
 858                     (id_t)arg3, (psetid_t *)arg4));
 859         case PSET_BIND_LWP:
 860                 return (pset_bind_lwp((psetid_t)arg1, (id_t)arg2,
 861                     (pid_t)arg3, (psetid_t *)arg4));
 862         case PSET_GETLOADAVG:
 863                 return (pset_getloadavg((psetid_t)arg1, (int *)arg2,
 864                     (int)arg3));
 865         case PSET_LIST:
 866                 return (pset_list((psetid_t *)arg1, (uint_t *)arg2));
 867         case PSET_SETATTR:
 868                 return (pset_setattr((psetid_t)arg1, (uint_t)arg2));
 869         case PSET_GETATTR:
 870                 return (pset_getattr((psetid_t)arg1, (uint_t *)arg2));
 871         case PSET_ASSIGN_FORCED:
 872                 return (pset_assign((psetid_t)arg1,
 873                     (processorid_t)arg2, (psetid_t *)arg3, 1));
 874         default:
 875                 return (set_errno(EINVAL));
 876         }
 877 }