1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/kmem.h>
  26 #include <sys/errno.h>
  27 #include <sys/systm.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/brand.h>
  30 #include <sys/machbrand.h>
  31 #include <sys/modctl.h>
  32 #include <sys/rwlock.h>
  33 #include <sys/zone.h>
  34 #include <sys/pathname.h>
  35 
  36 #define SUPPORTED_BRAND_VERSION BRAND_VER_1
  37 
  38 #if defined(__sparcv9)
  39 /* sparcv9 uses system wide brand interposition hooks */
  40 static void brand_plat_interposition_enable(void);
  41 static void brand_plat_interposition_disable(void);
  42 
  43 struct brand_mach_ops native_mach_ops  = {
  44                 NULL, NULL
  45 };
  46 #else /* !__sparcv9 */
  47 struct brand_mach_ops native_mach_ops  = {
  48                 NULL, NULL, NULL, NULL
  49 };
  50 #endif /* !__sparcv9 */
  51 
  52 brand_t native_brand = {
  53                 BRAND_VER_1,
  54                 "native",
  55                 NULL,
  56                 &native_mach_ops
  57 };
  58 
  59 /*
  60  * Used to maintain a list of all the brands currently loaded into the
  61  * kernel.
  62  */
  63 struct brand_list {
  64         int                     bl_refcnt;
  65         struct brand_list       *bl_next;
  66         brand_t                 *bl_brand;
  67 };
  68 
  69 static struct brand_list *brand_list = NULL;
  70 
  71 /*
  72  * This lock protects the integrity of the brand list.
  73  */
  74 static kmutex_t brand_list_lock;
  75 
  76 void
  77 brand_init()
  78 {
  79         mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
  80         p0.p_brand = &native_brand;
  81 }
  82 
  83 int
  84 brand_register(brand_t *brand)
  85 {
  86         struct brand_list *list, *scan;
  87 
  88         if (brand == NULL)
  89                 return (EINVAL);
  90 
  91         if (brand->b_version != SUPPORTED_BRAND_VERSION) {
  92                 if (brand->b_version < SUPPORTED_BRAND_VERSION) {
  93                         cmn_err(CE_WARN,
  94                             "brand '%s' was built to run on older versions "
  95                             "of Solaris.",
  96                             brand->b_name);
  97                 } else {
  98                         cmn_err(CE_WARN,
  99                             "brand '%s' was built to run on a newer version "
 100                             "of Solaris.",
 101                             brand->b_name);
 102                 }
 103                 return (EINVAL);
 104         }
 105 
 106         /* Sanity checks */
 107         if (brand->b_name == NULL || brand->b_ops == NULL ||
 108             brand->b_ops->b_brandsys == NULL) {
 109                 cmn_err(CE_WARN, "Malformed brand");
 110                 return (EINVAL);
 111         }
 112 
 113         list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
 114 
 115         /* Add the brand to the list of loaded brands. */
 116         mutex_enter(&brand_list_lock);
 117 
 118         /*
 119          * Check to be sure we haven't already registered this brand.
 120          */
 121         for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
 122                 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
 123                         cmn_err(CE_WARN,
 124                             "Invalid attempt to load a second instance of "
 125                             "brand %s", brand->b_name);
 126                         mutex_exit(&brand_list_lock);
 127                         kmem_free(list, sizeof (struct brand_list));
 128                         return (EINVAL);
 129                 }
 130         }
 131 
 132 #if defined(__sparcv9)
 133         /* sparcv9 uses system wide brand interposition hooks */
 134         if (brand_list == NULL)
 135                 brand_plat_interposition_enable();
 136 #endif /* __sparcv9 */
 137 
 138         list->bl_brand = brand;
 139         list->bl_refcnt = 0;
 140         list->bl_next = brand_list;
 141         brand_list = list;
 142 
 143         mutex_exit(&brand_list_lock);
 144 
 145         return (0);
 146 }
 147 
 148 /*
 149  * The kernel module implementing this brand is being unloaded, so remove
 150  * it from the list of active brands.
 151  */
 152 int
 153 brand_unregister(brand_t *brand)
 154 {
 155         struct brand_list *list, *prev;
 156 
 157         /* Sanity checks */
 158         if (brand == NULL || brand->b_name == NULL) {
 159                 cmn_err(CE_WARN, "Malformed brand");
 160                 return (EINVAL);
 161         }
 162 
 163         prev = NULL;
 164         mutex_enter(&brand_list_lock);
 165 
 166         for (list = brand_list; list != NULL; list = list->bl_next) {
 167                 if (list->bl_brand == brand)
 168                         break;
 169                 prev = list;
 170         }
 171 
 172         if (list == NULL) {
 173                 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
 174                 mutex_exit(&brand_list_lock);
 175                 return (EINVAL);
 176         }
 177 
 178         if (list->bl_refcnt > 0) {
 179                 cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
 180                     brand->b_name);
 181                 mutex_exit(&brand_list_lock);
 182                 return (EBUSY);
 183         }
 184 
 185         /* Remove brand from the list */
 186         if (prev != NULL)
 187                 prev->bl_next = list->bl_next;
 188         else
 189                 brand_list = list->bl_next;
 190 
 191 #if defined(__sparcv9)
 192         /* sparcv9 uses system wide brand interposition hooks */
 193         if (brand_list == NULL)
 194                 brand_plat_interposition_disable();
 195 #endif /* __sparcv9 */
 196 
 197         mutex_exit(&brand_list_lock);
 198 
 199         kmem_free(list, sizeof (struct brand_list));
 200 
 201         return (0);
 202 }
 203 
 204 /*
 205  * Record that a zone of this brand has been instantiated.  If the kernel
 206  * module implementing this brand's functionality is not present, this
 207  * routine attempts to load the module as a side effect.
 208  */
 209 brand_t *
 210 brand_register_zone(struct brand_attr *attr)
 211 {
 212         struct brand_list *l = NULL;
 213         ddi_modhandle_t hdl = NULL;
 214         char *modname;
 215         int err = 0;
 216 
 217         if (is_system_labeled()) {
 218                 cmn_err(CE_WARN,
 219                     "Branded zones are not allowed on labeled systems.");
 220                 return (NULL);
 221         }
 222 
 223         /*
 224          * We make at most two passes through this loop.  The first time
 225          * through, we're looking to see if this is a new user of an
 226          * already loaded brand.  If the brand hasn't been loaded, we
 227          * call ddi_modopen() to force it to be loaded and then make a
 228          * second pass through the list of brands.  If we don't find the
 229          * brand the second time through it means that the modname
 230          * specified in the brand_attr structure doesn't provide the brand
 231          * specified in the brandname field.  This would suggest a bug in
 232          * the brand's config.xml file.  We close the module and return
 233          * 'NULL' to the caller.
 234          */
 235         for (;;) {
 236                 /*
 237                  * Search list of loaded brands
 238                  */
 239                 mutex_enter(&brand_list_lock);
 240                 for (l = brand_list; l != NULL; l = l->bl_next)
 241                         if (strcmp(attr->ba_brandname,
 242                             l->bl_brand->b_name) == 0)
 243                                 break;
 244                 if ((l != NULL) || (hdl != NULL))
 245                         break;
 246                 mutex_exit(&brand_list_lock);
 247 
 248                 /*
 249                  * We didn't find that the requested brand has been loaded
 250                  * yet, so we trigger the load of the appropriate kernel
 251                  * module and search the list again.
 252                  */
 253                 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 254                 (void) strcpy(modname, "brand/");
 255                 (void) strcat(modname, attr->ba_modname);
 256                 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
 257                 kmem_free(modname, MAXPATHLEN);
 258 
 259                 if (err != 0)
 260                         return (NULL);
 261         }
 262 
 263         /*
 264          * If we found the matching brand, bump its reference count.
 265          */
 266         if (l != NULL)
 267                 l->bl_refcnt++;
 268 
 269         mutex_exit(&brand_list_lock);
 270 
 271         if (hdl != NULL)
 272                 (void) ddi_modclose(hdl);
 273 
 274         return ((l != NULL) ? l->bl_brand : NULL);
 275 }
 276 
 277 /*
 278  * Return the number of zones currently using this brand.
 279  */
 280 int
 281 brand_zone_count(struct brand *bp)
 282 {
 283         struct brand_list *l;
 284         int cnt = 0;
 285 
 286         mutex_enter(&brand_list_lock);
 287         for (l = brand_list; l != NULL; l = l->bl_next)
 288                 if (l->bl_brand == bp) {
 289                         cnt = l->bl_refcnt;
 290                         break;
 291                 }
 292         mutex_exit(&brand_list_lock);
 293 
 294         return (cnt);
 295 }
 296 
 297 void
 298 brand_unregister_zone(struct brand *bp)
 299 {
 300         struct brand_list *list;
 301 
 302         mutex_enter(&brand_list_lock);
 303         for (list = brand_list; list != NULL; list = list->bl_next) {
 304                 if (list->bl_brand == bp) {
 305                         ASSERT(list->bl_refcnt > 0);
 306                         list->bl_refcnt--;
 307                         break;
 308                 }
 309         }
 310         mutex_exit(&brand_list_lock);
 311 }
 312 
 313 void
 314 brand_setbrand(proc_t *p)
 315 {
 316         brand_t *bp = p->p_zone->zone_brand;
 317 
 318         ASSERT(bp != NULL);
 319         ASSERT(p->p_brand == &native_brand);
 320 
 321         /*
 322          * We should only be called from exec(), when we know the process
 323          * is single-threaded.
 324          */
 325         ASSERT(p->p_tlist == p->p_tlist->t_forw);
 326 
 327         p->p_brand = bp;
 328         ASSERT(PROC_IS_BRANDED(p));
 329         BROP(p)->b_setbrand(p);
 330 }
 331 
 332 void
 333 brand_clearbrand(proc_t *p, boolean_t no_lwps)
 334 {
 335         brand_t *bp = p->p_zone->zone_brand;
 336         klwp_t *lwp = NULL;
 337         ASSERT(bp != NULL);
 338         ASSERT(!no_lwps || (p->p_tlist == NULL));
 339 
 340         /*
 341          * If called from exec_common() or proc_exit(),
 342          * we know the process is single-threaded.
 343          * If called from fork_fail, p_tlist is NULL.
 344          */
 345         if (!no_lwps) {
 346                 ASSERT(p->p_tlist == p->p_tlist->t_forw);
 347                 lwp = p->p_tlist->t_lwp;
 348         }
 349 
 350         ASSERT(PROC_IS_BRANDED(p));
 351         BROP(p)->b_proc_exit(p, lwp);
 352         p->p_brand = &native_brand;
 353 }
 354 
 355 #if defined(__sparcv9)
 356 /*
 357  * Currently, only sparc has system level brand syscall interposition.
 358  * On x86 we're able to enable syscall interposition on a per-cpu basis
 359  * when a branded thread is scheduled to run on a cpu.
 360  */
 361 
 362 /* Local variables needed for dynamic syscall interposition support */
 363 static uint32_t syscall_trap_patch_instr_orig;
 364 static uint32_t syscall_trap32_patch_instr_orig;
 365 
 366 /* Trap Table syscall entry hot patch points */
 367 extern void     syscall_trap_patch_point(void);
 368 extern void     syscall_trap32_patch_point(void);
 369 
 370 /* Alternate syscall entry handlers used when branded zones are running */
 371 extern void     syscall_wrapper(void);
 372 extern void     syscall_wrapper32(void);
 373 
 374 /* Macros used to facilitate sparcv9 instruction generation */
 375 #define BA_A_INSTR      0x30800000      /* ba,a addr */
 376 #define DISP22(from, to) \
 377         ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
 378 
 379 /*ARGSUSED*/
 380 static void
 381 brand_plat_interposition_enable(void)
 382 {
 383         ASSERT(MUTEX_HELD(&brand_list_lock));
 384 
 385         /*
 386          * Before we hot patch the kernel save the current instructions
 387          * so that we can restore them later.
 388          */
 389         syscall_trap_patch_instr_orig =
 390             *(uint32_t *)syscall_trap_patch_point;
 391         syscall_trap32_patch_instr_orig =
 392             *(uint32_t *)syscall_trap32_patch_point;
 393 
 394         /*
 395          * Modify the trap table at the patch points.
 396          *
 397          * We basically replace the first instruction at the patch
 398          * point with a ba,a instruction that will transfer control
 399          * to syscall_wrapper or syscall_wrapper32 for 64-bit and
 400          * 32-bit syscalls respectively.  It's important to note that
 401          * the annul bit is set in the branch so we don't execute
 402          * the instruction directly following the one we're patching
 403          * during the branch's delay slot.
 404          *
 405          * It also doesn't matter that we're not atomically updating both
 406          * the 64 and 32 bit syscall paths at the same time since there's
 407          * no actual branded processes running on the system yet.
 408          */
 409         hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
 410             BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
 411             4);
 412         hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
 413             BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
 414             4);
 415 }
 416 
 417 /*ARGSUSED*/
 418 static void
 419 brand_plat_interposition_disable(void)
 420 {
 421         ASSERT(MUTEX_HELD(&brand_list_lock));
 422 
 423         /*
 424          * Restore the original instructions at the trap table syscall
 425          * patch points to disable the brand syscall interposition
 426          * mechanism.
 427          */
 428         hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
 429             syscall_trap_patch_instr_orig, 4);
 430         hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
 431             syscall_trap32_patch_instr_orig, 4);
 432 }
 433 #endif /* __sparcv9 */
 434 
 435 /*
 436  * The following functions can be shared among kernel brand modules which
 437  * implement Solaris-derived brands, all of which need to do similar tasks
 438  * to manage the brand.
 439  */
 440 
 441 #if defined(_LP64)
 442 static void
 443 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
 444 {
 445         bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
 446         dst->e_type =                src->e_type;
 447         dst->e_machine =     src->e_machine;
 448         dst->e_version =     src->e_version;
 449         dst->e_entry =               src->e_entry;
 450         dst->e_phoff =               src->e_phoff;
 451         dst->e_shoff =               src->e_shoff;
 452         dst->e_flags =               src->e_flags;
 453         dst->e_ehsize =              src->e_ehsize;
 454         dst->e_phentsize =   src->e_phentsize;
 455         dst->e_phnum =               src->e_phnum;
 456         dst->e_shentsize =   src->e_shentsize;
 457         dst->e_shnum =               src->e_shnum;
 458         dst->e_shstrndx =    src->e_shstrndx;
 459 }
 460 #endif /* _LP64 */
 461 
 462 /*
 463  * Return -1 if the cmd was not handled by this function.
 464  */
 465 /*ARGSUSED*/
 466 int
 467 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
 468     struct brand *pbrand, int brandvers)
 469 {
 470         brand_proc_data_t       *spd;
 471         brand_proc_reg_t        reg;
 472         proc_t                  *p = curproc;
 473         int                     err;
 474 
 475         /*
 476          * There is one operation that is supported for a native
 477          * process; B_EXEC_BRAND.  This brand operaion is redundant
 478          * since the kernel assumes a native process doing an exec
 479          * in a branded zone is going to run a branded processes.
 480          * hence we don't support this operation.
 481          */
 482         if (cmd == B_EXEC_BRAND)
 483                 return (ENOSYS);
 484 
 485         /* For all other operations this must be a branded process. */
 486         if (p->p_brand == &native_brand)
 487                 return (ENOSYS);
 488 
 489         ASSERT(p->p_brand == pbrand);
 490         ASSERT(p->p_brand_data != NULL);
 491 
 492         spd = (brand_proc_data_t *)p->p_brand_data;
 493 
 494         switch ((cmd)) {
 495         case B_EXEC_NATIVE:
 496                 err = exec_common((char *)arg1, (const char **)arg2,
 497                     (const char **)arg3, EBA_NATIVE);
 498                 return (err);
 499 
 500         /*
 501          * Get the address of the user-space system call handler from
 502          * the user process and attach it to the proc structure.
 503          */
 504         case B_REGISTER:
 505                 if (p->p_model == DATAMODEL_NATIVE) {
 506                         if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
 507                                 return (EFAULT);
 508                 }
 509 #if defined(_LP64)
 510                 else {
 511                         brand_common_reg32_t reg32;
 512 
 513                         if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
 514                                 return (EFAULT);
 515                         reg.sbr_version = reg32.sbr_version;
 516                         reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
 517                 }
 518 #endif /* _LP64 */
 519 
 520                 if (reg.sbr_version != brandvers)
 521                         return (ENOTSUP);
 522                 spd->spd_handler = reg.sbr_handler;
 523                 return (0);
 524 
 525         case B_ELFDATA:
 526                 if (p->p_model == DATAMODEL_NATIVE) {
 527                         if (copyout(&spd->spd_elf_data, (void *)arg1,
 528                             sizeof (brand_elf_data_t)) != 0)
 529                                 return (EFAULT);
 530                 }
 531 #if defined(_LP64)
 532                 else {
 533                         brand_elf_data32_t sed32;
 534 
 535                         sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
 536                         sed32.sed_phent = spd->spd_elf_data.sed_phent;
 537                         sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
 538                         sed32.sed_entry = spd->spd_elf_data.sed_entry;
 539                         sed32.sed_base = spd->spd_elf_data.sed_base;
 540                         sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
 541                         sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
 542                         if (copyout(&sed32, (void *)arg1, sizeof (sed32))
 543                             != 0)
 544                                 return (EFAULT);
 545                 }
 546 #endif /* _LP64 */
 547                 return (0);
 548 
 549         /*
 550          * The B_TRUSS_POINT subcommand exists so that we can see
 551          * truss output from interposed system calls that return
 552          * without first calling any other system call, meaning they
 553          * would be invisible to truss(1).
 554          * If the second argument is set non-zero, set errno to that
 555          * value as well.
 556          *
 557          * Common arguments seen with truss are:
 558          *
 559          *      arg1: syscall number
 560          *      arg2: errno
 561          */
 562         case B_TRUSS_POINT:
 563                 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
 564         }
 565 
 566         return (-1);
 567 }
 568 
 569 /*ARGSUSED*/
 570 void
 571 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
 572 {
 573         brand_proc_data_t       *spd;
 574 
 575         ASSERT(parent->p_brand == pbrand);
 576         ASSERT(child->p_brand == pbrand);
 577         ASSERT(parent->p_brand_data != NULL);
 578         ASSERT(child->p_brand_data == NULL);
 579 
 580         /*
 581          * Just duplicate all the proc data of the parent for the
 582          * child
 583          */
 584         spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
 585         bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
 586         child->p_brand_data = spd;
 587 }
 588 
 589 static void
 590 restoreexecenv(struct execenv *ep, stack_t *sp)
 591 {
 592         klwp_t *lwp = ttolwp(curthread);
 593 
 594         setexecenv(ep);
 595         lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
 596         lwp->lwp_sigaltstack.ss_size = sp->ss_size;
 597         lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
 598 }
 599 
 600 /*ARGSUSED*/
 601 int
 602 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
 603     intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
 604     cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
 605     char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
 606 {
 607 
 608         vnode_t         *nvp;
 609         Ehdr            ehdr;
 610         Addr            uphdr_vaddr;
 611         intptr_t        voffset;
 612         int             interp;
 613         int             i, err;
 614         struct execenv  env;
 615         struct execenv  origenv;
 616         stack_t         orig_sigaltstack;
 617         struct user     *up = PTOU(curproc);
 618         proc_t          *p = ttoproc(curthread);
 619         klwp_t          *lwp = ttolwp(curthread);
 620         brand_proc_data_t       *spd;
 621         brand_elf_data_t sed, *sedp;
 622         char            *linker;
 623         uintptr_t       lddata; /* lddata of executable's linker */
 624 
 625         ASSERT(curproc->p_brand == pbrand);
 626         ASSERT(curproc->p_brand_data != NULL);
 627 
 628         spd = (brand_proc_data_t *)curproc->p_brand_data;
 629         sedp = &spd->spd_elf_data;
 630 
 631         args->brandname = bname;
 632 
 633         /*
 634          * We will exec the brand library and then map in the target
 635          * application and (optionally) the brand's default linker.
 636          */
 637         if (args->to_model == DATAMODEL_NATIVE) {
 638                 args->emulator = brandlib;
 639                 linker = brandlinker;
 640         }
 641 #if defined(_LP64)
 642         else {
 643                 args->emulator = brandlib32;
 644                 linker = brandlinker32;
 645         }
 646 #endif  /* _LP64 */
 647 
 648         if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
 649             NULLVPP, &nvp)) != 0) {
 650                 uprintf("%s: not found.", args->emulator);
 651                 return (err);
 652         }
 653 
 654         /*
 655          * The following elf{32}exec call changes the execenv in the proc
 656          * struct which includes changing the p_exec member to be the vnode
 657          * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
 658          * We will eventually set the p_exec member to be the vnode for the new
 659          * executable when we call setexecenv().  However, if we get an error
 660          * before that call we need to restore the execenv to its original
 661          * values so that when we return to the caller fop_close() works
 662          * properly while cleaning up from the failed exec().  Restoring the
 663          * original value will also properly decrement the 2nd VN_RELE that we
 664          * took on the brand library.
 665          */
 666         origenv.ex_bssbase = p->p_bssbase;
 667         origenv.ex_brkbase = p->p_brkbase;
 668         origenv.ex_brksize = p->p_brksize;
 669         origenv.ex_vp = p->p_exec;
 670         orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
 671         orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
 672         orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
 673 
 674         if (args->to_model == DATAMODEL_NATIVE) {
 675                 err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
 676                     setid, exec_file, cred, brand_action);
 677         }
 678 #if defined(_LP64)
 679         else {
 680                 err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
 681                     execsz, setid, exec_file, cred, brand_action);
 682         }
 683 #endif  /* _LP64 */
 684         VN_RELE(nvp);
 685         if (err != 0) {
 686                 restoreexecenv(&origenv, &orig_sigaltstack);
 687                 return (err);
 688         }
 689 
 690         /*
 691          * The u_auxv veCTors are set up by elfexec to point to the
 692          * brand emulation library and linker.  Save these so they can
 693          * be copied to the specific brand aux vectors.
 694          */
 695         bzero(&sed, sizeof (sed));
 696         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
 697                 switch (up->u_auxv[i].a_type) {
 698                 case AT_SUN_LDDATA:
 699                         sed.sed_lddata = up->u_auxv[i].a_un.a_val;
 700                         break;
 701                 case AT_BASE:
 702                         sed.sed_base = up->u_auxv[i].a_un.a_val;
 703                         break;
 704                 case AT_ENTRY:
 705                         sed.sed_entry = up->u_auxv[i].a_un.a_val;
 706                         break;
 707                 case AT_PHDR:
 708                         sed.sed_phdr = up->u_auxv[i].a_un.a_val;
 709                         break;
 710                 case AT_PHENT:
 711                         sed.sed_phent = up->u_auxv[i].a_un.a_val;
 712                         break;
 713                 case AT_PHNUM:
 714                         sed.sed_phnum = up->u_auxv[i].a_un.a_val;
 715                         break;
 716                 default:
 717                         break;
 718                 }
 719         }
 720         /* Make sure the emulator has an entry point */
 721         ASSERT(sed.sed_entry != NULL);
 722         ASSERT(sed.sed_phdr != NULL);
 723 
 724         bzero(&env, sizeof (env));
 725         if (args->to_model == DATAMODEL_NATIVE) {
 726                 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
 727                     &voffset, exec_file, &interp, &env.ex_bssbase,
 728                     &env.ex_brkbase, &env.ex_brksize, NULL);
 729         }
 730 #if defined(_LP64)
 731         else {
 732                 Elf32_Ehdr ehdr32;
 733                 Elf32_Addr uphdr_vaddr32;
 734                 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
 735                     &voffset, exec_file, &interp, &env.ex_bssbase,
 736                     &env.ex_brkbase, &env.ex_brksize, NULL);
 737                 Ehdr32to64(&ehdr32, &ehdr);
 738 
 739                 if (uphdr_vaddr32 == (Elf32_Addr)-1)
 740                         uphdr_vaddr = (Addr)-1;
 741                 else
 742                         uphdr_vaddr = uphdr_vaddr32;
 743         }
 744 #endif  /* _LP64 */
 745         if (err != 0) {
 746                 restoreexecenv(&origenv, &orig_sigaltstack);
 747                 return (err);
 748         }
 749 
 750         /*
 751          * Save off the important properties of the executable. The
 752          * brand library will ask us for this data later, when it is
 753          * initializing and getting ready to transfer control to the
 754          * brand application.
 755          */
 756         if (uphdr_vaddr == (Addr)-1)
 757                 sedp->sed_phdr = voffset + ehdr.e_phoff;
 758         else
 759                 sedp->sed_phdr = voffset + uphdr_vaddr;
 760         sedp->sed_entry = voffset + ehdr.e_entry;
 761         sedp->sed_phent = ehdr.e_phentsize;
 762         sedp->sed_phnum = ehdr.e_phnum;
 763 
 764         if (interp) {
 765                 if (ehdr.e_type == ET_DYN) {
 766                         /*
 767                          * This is a shared object executable, so we
 768                          * need to pick a reasonable place to put the
 769                          * heap. Just don't use the first page.
 770                          */
 771                         env.ex_brkbase = (caddr_t)PAGESIZE;
 772                         env.ex_bssbase = (caddr_t)PAGESIZE;
 773                 }
 774 
 775                 /*
 776                  * If the program needs an interpreter (most do), map
 777                  * it in and store relevant information about it in the
 778                  * aux vector, where the brand library can find it.
 779                  */
 780                 if ((err = lookupname(linker, UIO_SYSSPACE,
 781                     FOLLOW, NULLVPP, &nvp)) != 0) {
 782                         uprintf("%s: not found.", brandlinker);
 783                         restoreexecenv(&origenv, &orig_sigaltstack);
 784                         return (err);
 785                 }
 786                 if (args->to_model == DATAMODEL_NATIVE) {
 787                         err = mapexec_brand(nvp, args, &ehdr,
 788                             &uphdr_vaddr, &voffset, exec_file, &interp,
 789                             NULL, NULL, NULL, &lddata);
 790                 }
 791 #if defined(_LP64)
 792                 else {
 793                         Elf32_Ehdr ehdr32;
 794                         Elf32_Addr uphdr_vaddr32;
 795                         err = mapexec32_brand(nvp, args, &ehdr32,
 796                             &uphdr_vaddr32, &voffset, exec_file, &interp,
 797                             NULL, NULL, NULL, &lddata);
 798                         Ehdr32to64(&ehdr32, &ehdr);
 799 
 800                         if (uphdr_vaddr32 == (Elf32_Addr)-1)
 801                                 uphdr_vaddr = (Addr)-1;
 802                         else
 803                                 uphdr_vaddr = uphdr_vaddr32;
 804                 }
 805 #endif  /* _LP64 */
 806                 VN_RELE(nvp);
 807                 if (err != 0) {
 808                         restoreexecenv(&origenv, &orig_sigaltstack);
 809                         return (err);
 810                 }
 811 
 812                 /*
 813                  * Now that we know the base address of the brand's
 814                  * linker, place it in the aux vector.
 815                  */
 816                 sedp->sed_base = voffset;
 817                 sedp->sed_ldentry = voffset + ehdr.e_entry;
 818                 sedp->sed_lddata = voffset + lddata;
 819         } else {
 820                 /*
 821                  * This program has no interpreter. The brand library
 822                  * will jump to the address in the AT_SUN_BRAND_LDENTRY
 823                  * aux vector, so in this case, put the entry point of
 824                  * the main executable there.
 825                  */
 826                 if (ehdr.e_type == ET_EXEC) {
 827                         /*
 828                          * An executable with no interpreter, this must
 829                          * be a statically linked executable, which
 830                          * means we loaded it at the address specified
 831                          * in the elf header, in which case the e_entry
 832                          * field of the elf header is an absolute
 833                          * address.
 834                          */
 835                         sedp->sed_ldentry = ehdr.e_entry;
 836                         sedp->sed_entry = ehdr.e_entry;
 837                         sedp->sed_lddata = NULL;
 838                         sedp->sed_base = NULL;
 839                 } else {
 840                         /*
 841                          * A shared object with no interpreter, we use
 842                          * the calculated address from above.
 843                          */
 844                         sedp->sed_ldentry = sedp->sed_entry;
 845                         sedp->sed_entry = NULL;
 846                         sedp->sed_phdr = NULL;
 847                         sedp->sed_phent = NULL;
 848                         sedp->sed_phnum = NULL;
 849                         sedp->sed_lddata = NULL;
 850                         sedp->sed_base = voffset;
 851 
 852                         if (ehdr.e_type == ET_DYN) {
 853                                 /*
 854                                  * Delay setting the brkbase until the
 855                                  * first call to brk(); see elfexec()
 856                                  * for details.
 857                                  */
 858                                 env.ex_bssbase = (caddr_t)0;
 859                                 env.ex_brkbase = (caddr_t)0;
 860                                 env.ex_brksize = 0;
 861                         }
 862                 }
 863         }
 864 
 865         env.ex_magic = elfmagic;
 866         env.ex_vp = vp;
 867         setexecenv(&env);
 868 
 869         /*
 870          * It's time to manipulate the process aux vectors.  First
 871          * we need to update the AT_SUN_AUXFLAGS aux vector to set
 872          * the AF_SUN_NOPLM flag.
 873          */
 874         if (args->to_model == DATAMODEL_NATIVE) {
 875                 auxv_t          auxflags_auxv;
 876 
 877                 if (copyin(args->auxp_auxflags, &auxflags_auxv,
 878                     sizeof (auxflags_auxv)) != 0)
 879                         return (EFAULT);
 880 
 881                 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
 882                 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
 883                 if (copyout(&auxflags_auxv, args->auxp_auxflags,
 884                     sizeof (auxflags_auxv)) != 0)
 885                         return (EFAULT);
 886         }
 887 #if defined(_LP64)
 888         else {
 889                 auxv32_t        auxflags_auxv32;
 890 
 891                 if (copyin(args->auxp_auxflags, &auxflags_auxv32,
 892                     sizeof (auxflags_auxv32)) != 0)
 893                         return (EFAULT);
 894 
 895                 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
 896                 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
 897                 if (copyout(&auxflags_auxv32, args->auxp_auxflags,
 898                     sizeof (auxflags_auxv32)) != 0)
 899                         return (EFAULT);
 900         }
 901 #endif  /* _LP64 */
 902 
 903         /* Second, copy out the brand specific aux vectors. */
 904         if (args->to_model == DATAMODEL_NATIVE) {
 905                 auxv_t brand_auxv[] = {
 906                     { AT_SUN_BRAND_AUX1, {0} },
 907                     { AT_SUN_BRAND_AUX2, {0} },
 908                     { AT_SUN_BRAND_AUX3, {0} }
 909                 };
 910 
 911                 ASSERT(brand_auxv[0].a_type ==
 912                     AT_SUN_BRAND_COMMON_LDDATA);
 913                 brand_auxv[0].a_un.a_val = sed.sed_lddata;
 914 
 915                 if (copyout(&brand_auxv, args->auxp_brand,
 916                     sizeof (brand_auxv)) != 0)
 917                         return (EFAULT);
 918         }
 919 #if defined(_LP64)
 920         else {
 921                 auxv32_t brand_auxv32[] = {
 922                     { AT_SUN_BRAND_AUX1, {0} },
 923                     { AT_SUN_BRAND_AUX2, {0} },
 924                     { AT_SUN_BRAND_AUX3, {0} }
 925                 };
 926 
 927                 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
 928                 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
 929                 if (copyout(&brand_auxv32, args->auxp_brand,
 930                     sizeof (brand_auxv32)) != 0)
 931                         return (EFAULT);
 932         }
 933 #endif  /* _LP64 */
 934 
 935         /*
 936          * Third, the /proc aux vectors set up by elfexec() point to
 937          * brand emulation library and it's linker.  Copy these to the
 938          * /proc brand specific aux vector, and update the regular
 939          * /proc aux vectors to point to the executable (and it's
 940          * linker).  This will enable debuggers to access the
 941          * executable via the usual /proc or elf notes aux vectors.
 942          *
 943          * The brand emulation library's linker will get it's aux
 944          * vectors off the stack, and then update the stack with the
 945          * executable's aux vectors before jumping to the executable's
 946          * linker.
 947          *
 948          * Debugging the brand emulation library must be done from
 949          * the global zone, where the librtld_db module knows how to
 950          * fetch the brand specific aux vectors to access the brand
 951          * emulation libraries linker.
 952          */
 953         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
 954                 ulong_t val;
 955 
 956                 switch (up->u_auxv[i].a_type) {
 957                 case AT_SUN_BRAND_COMMON_LDDATA:
 958                         up->u_auxv[i].a_un.a_val = sed.sed_lddata;
 959                         continue;
 960                 case AT_BASE:
 961                         val = sedp->sed_base;
 962                         break;
 963                 case AT_ENTRY:
 964                         val = sedp->sed_entry;
 965                         break;
 966                 case AT_PHDR:
 967                         val = sedp->sed_phdr;
 968                         break;
 969                 case AT_PHENT:
 970                         val = sedp->sed_phent;
 971                         break;
 972                 case AT_PHNUM:
 973                         val = sedp->sed_phnum;
 974                         break;
 975                 case AT_SUN_LDDATA:
 976                         val = sedp->sed_lddata;
 977                         break;
 978                 default:
 979                         continue;
 980                 }
 981 
 982                 up->u_auxv[i].a_un.a_val = val;
 983                 if (val == NULL) {
 984                         /* Hide the entry for static binaries */
 985                         up->u_auxv[i].a_type = AT_IGNORE;
 986                 }
 987         }
 988 
 989         /*
 990          * The last thing we do here is clear spd->spd_handler.  This
 991          * is important because if we're already a branded process and
 992          * if this exec succeeds, there is a window between when the
 993          * exec() first returns to the userland of the new process and
 994          * when our brand library get's initialized, during which we
 995          * don't want system calls to be re-directed to our brand
 996          * library since it hasn't been initialized yet.
 997          */
 998         spd->spd_handler = NULL;
 999 
1000         return (0);
1001 }
1002 
1003 void
1004 brand_solaris_exec(struct brand *pbrand)
1005 {
1006         brand_proc_data_t       *spd = curproc->p_brand_data;
1007 
1008         ASSERT(curproc->p_brand == pbrand);
1009         ASSERT(curproc->p_brand_data != NULL);
1010         ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1011 
1012         /*
1013          * We should only be called from exec(), when we know the process
1014          * is single-threaded.
1015          */
1016         ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1017 
1018         /* Upon exec, reset our lwp brand data. */
1019         (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1020         (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1021 
1022         /*
1023          * Upon exec, reset all the proc brand data, except for the elf
1024          * data associated with the executable we are exec'ing.
1025          */
1026         spd->spd_handler = NULL;
1027 }
1028 
1029 int
1030 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1031     struct brand *pbrand)
1032 {
1033         int err;
1034 
1035         /*
1036          * If there are any zones using this brand, we can't allow it
1037          * to be unloaded.
1038          */
1039         if (brand_zone_count(pbrand))
1040                 return (EBUSY);
1041 
1042         kmem_free(*emul_table, NSYSCALL);
1043         *emul_table = NULL;
1044 
1045         err = mod_remove(modlinkage);
1046         if (err)
1047                 cmn_err(CE_WARN, "Couldn't unload brand module");
1048 
1049         return (err);
1050 }
1051 
1052 /*ARGSUSED*/
1053 void
1054 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1055 {
1056         ASSERT(p->lwp_procp->p_brand == pbrand);
1057         ASSERT(c->lwp_procp->p_brand == pbrand);
1058 
1059         ASSERT(p->lwp_procp->p_brand_data != NULL);
1060         ASSERT(c->lwp_procp->p_brand_data != NULL);
1061 
1062         /*
1063          * Both LWPs have already had been initialized via
1064          * brand_solaris_initlwp().
1065          */
1066         ASSERT(p->lwp_brand != NULL);
1067         ASSERT(c->lwp_brand != NULL);
1068 }
1069 
1070 /*ARGSUSED*/
1071 void
1072 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1073 {
1074         ASSERT(l->lwp_procp->p_brand == pbrand);
1075         ASSERT(l->lwp_procp->p_brand_data != NULL);
1076         ASSERT(l->lwp_brand != NULL);
1077         l->lwp_brand = NULL;
1078 }
1079 
1080 /*ARGSUSED*/
1081 int
1082 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1083 {
1084         ASSERT(l->lwp_procp->p_brand == pbrand);
1085         ASSERT(l->lwp_procp->p_brand_data != NULL);
1086         ASSERT(l->lwp_brand == NULL);
1087         l->lwp_brand = (void *)-1;
1088         return (0);
1089 }
1090 
1091 /*ARGSUSED*/
1092 void
1093 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1094 {
1095         proc_t  *p = l->lwp_procp;
1096 
1097         ASSERT(l->lwp_procp->p_brand == pbrand);
1098         ASSERT(l->lwp_procp->p_brand_data != NULL);
1099         ASSERT(l->lwp_brand != NULL);
1100 
1101         /*
1102          * We should never be called for the last thread in a process.
1103          * (That case is handled by brand_solaris_proc_exit().)
1104          * Therefore this lwp must be exiting from a multi-threaded
1105          * process.
1106          */
1107         ASSERT(p->p_tlist != p->p_tlist->t_forw);
1108 
1109         l->lwp_brand = NULL;
1110 }
1111 
1112 /*ARGSUSED*/
1113 void
1114 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1115 {
1116         ASSERT(p->p_brand == pbrand);
1117         ASSERT(p->p_brand_data != NULL);
1118 
1119         /*
1120          * When called from proc_exit(), we know that process is
1121          * single-threaded and free our lwp brand data.
1122          * otherwise just free p_brand_data and return.
1123          */
1124         if (l != NULL) {
1125                 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1126                 ASSERT(p->p_tlist->t_lwp == l);
1127                 (void) brand_solaris_freelwp(l, pbrand);
1128         }
1129 
1130         /* upon exit, free our proc brand data */
1131         kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1132         p->p_brand_data = NULL;
1133 }
1134 
1135 void
1136 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1137 {
1138         ASSERT(p->p_brand == pbrand);
1139         ASSERT(p->p_brand_data == NULL);
1140 
1141         /*
1142          * We should only be called from exec(), when we know the process
1143          * is single-threaded.
1144          */
1145         ASSERT(p->p_tlist == p->p_tlist->t_forw);
1146 
1147         p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1148         (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1149 }