Print this page
Bring back LX zones.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/brand.c
+++ new/usr/src/uts/common/os/brand.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 #include <sys/kmem.h>
26 26 #include <sys/errno.h>
27 27 #include <sys/systm.h>
28 28 #include <sys/cmn_err.h>
29 29 #include <sys/brand.h>
30 30 #include <sys/machbrand.h>
31 31 #include <sys/modctl.h>
32 32 #include <sys/rwlock.h>
33 33 #include <sys/zone.h>
34 34 #include <sys/pathname.h>
35 35
36 36 #define SUPPORTED_BRAND_VERSION BRAND_VER_1
37 37
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
38 38 #if defined(__sparcv9)
39 39 /* sparcv9 uses system wide brand interposition hooks */
40 40 static void brand_plat_interposition_enable(void);
41 41 static void brand_plat_interposition_disable(void);
42 42
43 43 struct brand_mach_ops native_mach_ops = {
44 44 NULL, NULL
45 45 };
46 46 #else /* !__sparcv9 */
47 47 struct brand_mach_ops native_mach_ops = {
48 - NULL, NULL, NULL, NULL
48 + NULL, NULL, NULL, NULL, NULL, NULL
49 49 };
50 50 #endif /* !__sparcv9 */
51 51
52 52 brand_t native_brand = {
53 53 BRAND_VER_1,
54 54 "native",
55 55 NULL,
56 56 &native_mach_ops
57 57 };
58 58
59 59 /*
60 60 * Used to maintain a list of all the brands currently loaded into the
61 61 * kernel.
62 62 */
63 63 struct brand_list {
64 64 int bl_refcnt;
65 65 struct brand_list *bl_next;
66 66 brand_t *bl_brand;
67 67 };
68 68
69 69 static struct brand_list *brand_list = NULL;
70 70
71 71 /*
72 72 * This lock protects the integrity of the brand list.
73 73 */
74 74 static kmutex_t brand_list_lock;
75 75
76 76 void
77 77 brand_init()
78 78 {
79 79 mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
80 80 p0.p_brand = &native_brand;
81 81 }
82 82
83 83 int
84 84 brand_register(brand_t *brand)
85 85 {
86 86 struct brand_list *list, *scan;
87 87
88 88 if (brand == NULL)
89 89 return (EINVAL);
90 90
91 91 if (brand->b_version != SUPPORTED_BRAND_VERSION) {
92 92 if (brand->b_version < SUPPORTED_BRAND_VERSION) {
93 93 cmn_err(CE_WARN,
94 94 "brand '%s' was built to run on older versions "
95 95 "of Solaris.",
96 96 brand->b_name);
97 97 } else {
98 98 cmn_err(CE_WARN,
99 99 "brand '%s' was built to run on a newer version "
100 100 "of Solaris.",
101 101 brand->b_name);
102 102 }
103 103 return (EINVAL);
104 104 }
105 105
106 106 /* Sanity checks */
107 107 if (brand->b_name == NULL || brand->b_ops == NULL ||
108 108 brand->b_ops->b_brandsys == NULL) {
109 109 cmn_err(CE_WARN, "Malformed brand");
110 110 return (EINVAL);
111 111 }
112 112
113 113 list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
114 114
115 115 /* Add the brand to the list of loaded brands. */
116 116 mutex_enter(&brand_list_lock);
117 117
118 118 /*
119 119 * Check to be sure we haven't already registered this brand.
120 120 */
121 121 for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
122 122 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
123 123 cmn_err(CE_WARN,
124 124 "Invalid attempt to load a second instance of "
125 125 "brand %s", brand->b_name);
126 126 mutex_exit(&brand_list_lock);
127 127 kmem_free(list, sizeof (struct brand_list));
128 128 return (EINVAL);
129 129 }
130 130 }
131 131
132 132 #if defined(__sparcv9)
133 133 /* sparcv9 uses system wide brand interposition hooks */
134 134 if (brand_list == NULL)
135 135 brand_plat_interposition_enable();
136 136 #endif /* __sparcv9 */
137 137
138 138 list->bl_brand = brand;
139 139 list->bl_refcnt = 0;
140 140 list->bl_next = brand_list;
141 141 brand_list = list;
142 142
143 143 mutex_exit(&brand_list_lock);
144 144
145 145 return (0);
146 146 }
147 147
148 148 /*
149 149 * The kernel module implementing this brand is being unloaded, so remove
150 150 * it from the list of active brands.
151 151 */
152 152 int
153 153 brand_unregister(brand_t *brand)
154 154 {
155 155 struct brand_list *list, *prev;
156 156
157 157 /* Sanity checks */
158 158 if (brand == NULL || brand->b_name == NULL) {
159 159 cmn_err(CE_WARN, "Malformed brand");
160 160 return (EINVAL);
161 161 }
162 162
163 163 prev = NULL;
164 164 mutex_enter(&brand_list_lock);
165 165
166 166 for (list = brand_list; list != NULL; list = list->bl_next) {
167 167 if (list->bl_brand == brand)
168 168 break;
169 169 prev = list;
170 170 }
171 171
172 172 if (list == NULL) {
173 173 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
174 174 mutex_exit(&brand_list_lock);
175 175 return (EINVAL);
176 176 }
177 177
178 178 if (list->bl_refcnt > 0) {
179 179 cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
180 180 brand->b_name);
181 181 mutex_exit(&brand_list_lock);
182 182 return (EBUSY);
183 183 }
184 184
185 185 /* Remove brand from the list */
186 186 if (prev != NULL)
187 187 prev->bl_next = list->bl_next;
188 188 else
189 189 brand_list = list->bl_next;
190 190
191 191 #if defined(__sparcv9)
192 192 /* sparcv9 uses system wide brand interposition hooks */
193 193 if (brand_list == NULL)
194 194 brand_plat_interposition_disable();
195 195 #endif /* __sparcv9 */
196 196
197 197 mutex_exit(&brand_list_lock);
198 198
199 199 kmem_free(list, sizeof (struct brand_list));
200 200
201 201 return (0);
202 202 }
203 203
204 204 /*
205 205 * Record that a zone of this brand has been instantiated. If the kernel
206 206 * module implementing this brand's functionality is not present, this
207 207 * routine attempts to load the module as a side effect.
208 208 */
209 209 brand_t *
210 210 brand_register_zone(struct brand_attr *attr)
211 211 {
212 212 struct brand_list *l = NULL;
213 213 ddi_modhandle_t hdl = NULL;
214 214 char *modname;
215 215 int err = 0;
216 216
217 217 if (is_system_labeled()) {
218 218 cmn_err(CE_WARN,
219 219 "Branded zones are not allowed on labeled systems.");
220 220 return (NULL);
221 221 }
222 222
223 223 /*
224 224 * We make at most two passes through this loop. The first time
225 225 * through, we're looking to see if this is a new user of an
226 226 * already loaded brand. If the brand hasn't been loaded, we
227 227 * call ddi_modopen() to force it to be loaded and then make a
228 228 * second pass through the list of brands. If we don't find the
229 229 * brand the second time through it means that the modname
230 230 * specified in the brand_attr structure doesn't provide the brand
231 231 * specified in the brandname field. This would suggest a bug in
232 232 * the brand's config.xml file. We close the module and return
233 233 * 'NULL' to the caller.
234 234 */
235 235 for (;;) {
236 236 /*
237 237 * Search list of loaded brands
238 238 */
239 239 mutex_enter(&brand_list_lock);
240 240 for (l = brand_list; l != NULL; l = l->bl_next)
241 241 if (strcmp(attr->ba_brandname,
242 242 l->bl_brand->b_name) == 0)
243 243 break;
244 244 if ((l != NULL) || (hdl != NULL))
245 245 break;
246 246 mutex_exit(&brand_list_lock);
247 247
248 248 /*
249 249 * We didn't find that the requested brand has been loaded
250 250 * yet, so we trigger the load of the appropriate kernel
251 251 * module and search the list again.
252 252 */
253 253 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
254 254 (void) strcpy(modname, "brand/");
255 255 (void) strcat(modname, attr->ba_modname);
256 256 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
257 257 kmem_free(modname, MAXPATHLEN);
258 258
259 259 if (err != 0)
260 260 return (NULL);
261 261 }
262 262
263 263 /*
264 264 * If we found the matching brand, bump its reference count.
265 265 */
266 266 if (l != NULL)
267 267 l->bl_refcnt++;
268 268
269 269 mutex_exit(&brand_list_lock);
270 270
271 271 if (hdl != NULL)
272 272 (void) ddi_modclose(hdl);
273 273
274 274 return ((l != NULL) ? l->bl_brand : NULL);
275 275 }
276 276
277 277 /*
278 278 * Return the number of zones currently using this brand.
279 279 */
280 280 int
281 281 brand_zone_count(struct brand *bp)
282 282 {
283 283 struct brand_list *l;
284 284 int cnt = 0;
285 285
286 286 mutex_enter(&brand_list_lock);
287 287 for (l = brand_list; l != NULL; l = l->bl_next)
288 288 if (l->bl_brand == bp) {
289 289 cnt = l->bl_refcnt;
290 290 break;
291 291 }
292 292 mutex_exit(&brand_list_lock);
293 293
294 294 return (cnt);
295 295 }
296 296
297 297 void
298 298 brand_unregister_zone(struct brand *bp)
299 299 {
300 300 struct brand_list *list;
301 301
302 302 mutex_enter(&brand_list_lock);
303 303 for (list = brand_list; list != NULL; list = list->bl_next) {
304 304 if (list->bl_brand == bp) {
305 305 ASSERT(list->bl_refcnt > 0);
306 306 list->bl_refcnt--;
307 307 break;
308 308 }
309 309 }
310 310 mutex_exit(&brand_list_lock);
311 311 }
312 312
313 313 void
314 314 brand_setbrand(proc_t *p)
315 315 {
316 316 brand_t *bp = p->p_zone->zone_brand;
317 317
318 318 ASSERT(bp != NULL);
319 319 ASSERT(p->p_brand == &native_brand);
320 320
321 321 /*
322 322 * We should only be called from exec(), when we know the process
323 323 * is single-threaded.
324 324 */
325 325 ASSERT(p->p_tlist == p->p_tlist->t_forw);
326 326
327 327 p->p_brand = bp;
328 328 ASSERT(PROC_IS_BRANDED(p));
329 329 BROP(p)->b_setbrand(p);
330 330 }
331 331
332 332 void
333 333 brand_clearbrand(proc_t *p, boolean_t no_lwps)
334 334 {
335 335 brand_t *bp = p->p_zone->zone_brand;
336 336 klwp_t *lwp = NULL;
337 337 ASSERT(bp != NULL);
338 338 ASSERT(!no_lwps || (p->p_tlist == NULL));
339 339
340 340 /*
341 341 * If called from exec_common() or proc_exit(),
342 342 * we know the process is single-threaded.
343 343 * If called from fork_fail, p_tlist is NULL.
344 344 */
345 345 if (!no_lwps) {
346 346 ASSERT(p->p_tlist == p->p_tlist->t_forw);
347 347 lwp = p->p_tlist->t_lwp;
348 348 }
349 349
350 350 ASSERT(PROC_IS_BRANDED(p));
351 351 BROP(p)->b_proc_exit(p, lwp);
352 352 p->p_brand = &native_brand;
353 353 }
354 354
355 355 #if defined(__sparcv9)
356 356 /*
357 357 * Currently, only sparc has system level brand syscall interposition.
358 358 * On x86 we're able to enable syscall interposition on a per-cpu basis
359 359 * when a branded thread is scheduled to run on a cpu.
360 360 */
361 361
362 362 /* Local variables needed for dynamic syscall interposition support */
363 363 static uint32_t syscall_trap_patch_instr_orig;
364 364 static uint32_t syscall_trap32_patch_instr_orig;
365 365
366 366 /* Trap Table syscall entry hot patch points */
367 367 extern void syscall_trap_patch_point(void);
368 368 extern void syscall_trap32_patch_point(void);
369 369
370 370 /* Alternate syscall entry handlers used when branded zones are running */
371 371 extern void syscall_wrapper(void);
372 372 extern void syscall_wrapper32(void);
373 373
374 374 /* Macros used to facilitate sparcv9 instruction generation */
375 375 #define BA_A_INSTR 0x30800000 /* ba,a addr */
376 376 #define DISP22(from, to) \
377 377 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
378 378
379 379 /*ARGSUSED*/
380 380 static void
381 381 brand_plat_interposition_enable(void)
382 382 {
383 383 ASSERT(MUTEX_HELD(&brand_list_lock));
384 384
385 385 /*
386 386 * Before we hot patch the kernel save the current instructions
387 387 * so that we can restore them later.
388 388 */
389 389 syscall_trap_patch_instr_orig =
390 390 *(uint32_t *)syscall_trap_patch_point;
391 391 syscall_trap32_patch_instr_orig =
392 392 *(uint32_t *)syscall_trap32_patch_point;
393 393
394 394 /*
395 395 * Modify the trap table at the patch points.
396 396 *
397 397 * We basically replace the first instruction at the patch
398 398 * point with a ba,a instruction that will transfer control
399 399 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
400 400 * 32-bit syscalls respectively. It's important to note that
401 401 * the annul bit is set in the branch so we don't execute
402 402 * the instruction directly following the one we're patching
403 403 * during the branch's delay slot.
404 404 *
405 405 * It also doesn't matter that we're not atomically updating both
406 406 * the 64 and 32 bit syscall paths at the same time since there's
407 407 * no actual branded processes running on the system yet.
408 408 */
409 409 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
410 410 BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
411 411 4);
412 412 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
413 413 BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
414 414 4);
415 415 }
416 416
417 417 /*ARGSUSED*/
418 418 static void
419 419 brand_plat_interposition_disable(void)
420 420 {
421 421 ASSERT(MUTEX_HELD(&brand_list_lock));
422 422
423 423 /*
424 424 * Restore the original instructions at the trap table syscall
425 425 * patch points to disable the brand syscall interposition
426 426 * mechanism.
427 427 */
428 428 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
429 429 syscall_trap_patch_instr_orig, 4);
430 430 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
431 431 syscall_trap32_patch_instr_orig, 4);
432 432 }
433 433 #endif /* __sparcv9 */
434 434
435 435 /*
436 436 * The following functions can be shared among kernel brand modules which
437 437 * implement Solaris-derived brands, all of which need to do similar tasks
438 438 * to manage the brand.
439 439 */
440 440
441 441 #if defined(_LP64)
442 442 static void
443 443 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
444 444 {
445 445 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
446 446 dst->e_type = src->e_type;
447 447 dst->e_machine = src->e_machine;
448 448 dst->e_version = src->e_version;
449 449 dst->e_entry = src->e_entry;
450 450 dst->e_phoff = src->e_phoff;
451 451 dst->e_shoff = src->e_shoff;
452 452 dst->e_flags = src->e_flags;
453 453 dst->e_ehsize = src->e_ehsize;
454 454 dst->e_phentsize = src->e_phentsize;
455 455 dst->e_phnum = src->e_phnum;
456 456 dst->e_shentsize = src->e_shentsize;
457 457 dst->e_shnum = src->e_shnum;
458 458 dst->e_shstrndx = src->e_shstrndx;
459 459 }
460 460 #endif /* _LP64 */
461 461
462 462 /*
463 463 * Return -1 if the cmd was not handled by this function.
464 464 */
465 465 /*ARGSUSED*/
466 466 int
467 467 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
468 468 struct brand *pbrand, int brandvers)
469 469 {
470 470 brand_proc_data_t *spd;
471 471 brand_proc_reg_t reg;
472 472 proc_t *p = curproc;
473 473 int err;
474 474
475 475 /*
476 476 * There is one operation that is supported for a native
477 477 * process; B_EXEC_BRAND. This brand operaion is redundant
478 478 * since the kernel assumes a native process doing an exec
479 479 * in a branded zone is going to run a branded processes.
480 480 * hence we don't support this operation.
481 481 */
482 482 if (cmd == B_EXEC_BRAND)
483 483 return (ENOSYS);
484 484
485 485 /* For all other operations this must be a branded process. */
486 486 if (p->p_brand == &native_brand)
487 487 return (ENOSYS);
488 488
489 489 ASSERT(p->p_brand == pbrand);
490 490 ASSERT(p->p_brand_data != NULL);
491 491
492 492 spd = (brand_proc_data_t *)p->p_brand_data;
493 493
494 494 switch ((cmd)) {
495 495 case B_EXEC_NATIVE:
496 496 err = exec_common((char *)arg1, (const char **)arg2,
497 497 (const char **)arg3, EBA_NATIVE);
498 498 return (err);
499 499
500 500 /*
501 501 * Get the address of the user-space system call handler from
502 502 * the user process and attach it to the proc structure.
503 503 */
504 504 case B_REGISTER:
505 505 if (p->p_model == DATAMODEL_NATIVE) {
506 506 if (copyin((void *)arg1, ®, sizeof (reg)) != 0)
507 507 return (EFAULT);
508 508 }
509 509 #if defined(_LP64)
510 510 else {
511 511 brand_common_reg32_t reg32;
512 512
513 513 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0)
514 514 return (EFAULT);
515 515 reg.sbr_version = reg32.sbr_version;
516 516 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
517 517 }
518 518 #endif /* _LP64 */
519 519
520 520 if (reg.sbr_version != brandvers)
521 521 return (ENOTSUP);
522 522 spd->spd_handler = reg.sbr_handler;
523 523 return (0);
524 524
525 525 case B_ELFDATA:
526 526 if (p->p_model == DATAMODEL_NATIVE) {
527 527 if (copyout(&spd->spd_elf_data, (void *)arg1,
528 528 sizeof (brand_elf_data_t)) != 0)
529 529 return (EFAULT);
530 530 }
531 531 #if defined(_LP64)
532 532 else {
533 533 brand_elf_data32_t sed32;
534 534
535 535 sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
536 536 sed32.sed_phent = spd->spd_elf_data.sed_phent;
537 537 sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
538 538 sed32.sed_entry = spd->spd_elf_data.sed_entry;
539 539 sed32.sed_base = spd->spd_elf_data.sed_base;
540 540 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
541 541 sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
542 542 if (copyout(&sed32, (void *)arg1, sizeof (sed32))
543 543 != 0)
544 544 return (EFAULT);
545 545 }
546 546 #endif /* _LP64 */
547 547 return (0);
548 548
549 549 /*
550 550 * The B_TRUSS_POINT subcommand exists so that we can see
551 551 * truss output from interposed system calls that return
552 552 * without first calling any other system call, meaning they
553 553 * would be invisible to truss(1).
554 554 * If the second argument is set non-zero, set errno to that
555 555 * value as well.
556 556 *
557 557 * Common arguments seen with truss are:
558 558 *
559 559 * arg1: syscall number
560 560 * arg2: errno
561 561 */
562 562 case B_TRUSS_POINT:
563 563 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
564 564 }
565 565
566 566 return (-1);
567 567 }
568 568
569 569 /*ARGSUSED*/
570 570 void
571 571 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
572 572 {
573 573 brand_proc_data_t *spd;
574 574
575 575 ASSERT(parent->p_brand == pbrand);
576 576 ASSERT(child->p_brand == pbrand);
577 577 ASSERT(parent->p_brand_data != NULL);
578 578 ASSERT(child->p_brand_data == NULL);
579 579
580 580 /*
581 581 * Just duplicate all the proc data of the parent for the
582 582 * child
583 583 */
584 584 spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
585 585 bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
586 586 child->p_brand_data = spd;
587 587 }
588 588
589 589 static void
590 590 restoreexecenv(struct execenv *ep, stack_t *sp)
591 591 {
592 592 klwp_t *lwp = ttolwp(curthread);
593 593
594 594 setexecenv(ep);
595 595 lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
596 596 lwp->lwp_sigaltstack.ss_size = sp->ss_size;
597 597 lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
598 598 }
599 599
600 600 /*ARGSUSED*/
601 601 int
602 602 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
603 603 intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
604 604 cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
605 605 char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
606 606 {
607 607
608 608 vnode_t *nvp;
609 609 Ehdr ehdr;
610 610 Addr uphdr_vaddr;
611 611 intptr_t voffset;
612 612 int interp;
613 613 int i, err;
614 614 struct execenv env;
615 615 struct execenv origenv;
616 616 stack_t orig_sigaltstack;
617 617 struct user *up = PTOU(curproc);
618 618 proc_t *p = ttoproc(curthread);
619 619 klwp_t *lwp = ttolwp(curthread);
620 620 brand_proc_data_t *spd;
621 621 brand_elf_data_t sed, *sedp;
622 622 char *linker;
623 623 uintptr_t lddata; /* lddata of executable's linker */
624 624
625 625 ASSERT(curproc->p_brand == pbrand);
626 626 ASSERT(curproc->p_brand_data != NULL);
627 627
628 628 spd = (brand_proc_data_t *)curproc->p_brand_data;
629 629 sedp = &spd->spd_elf_data;
630 630
631 631 args->brandname = bname;
632 632
633 633 /*
634 634 * We will exec the brand library and then map in the target
635 635 * application and (optionally) the brand's default linker.
636 636 */
637 637 if (args->to_model == DATAMODEL_NATIVE) {
638 638 args->emulator = brandlib;
639 639 linker = brandlinker;
640 640 }
641 641 #if defined(_LP64)
642 642 else {
643 643 args->emulator = brandlib32;
644 644 linker = brandlinker32;
645 645 }
646 646 #endif /* _LP64 */
647 647
648 648 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
649 649 NULLVPP, &nvp)) != 0) {
650 650 uprintf("%s: not found.", args->emulator);
651 651 return (err);
652 652 }
653 653
654 654 /*
655 655 * The following elf{32}exec call changes the execenv in the proc
656 656 * struct which includes changing the p_exec member to be the vnode
657 657 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
658 658 * We will eventually set the p_exec member to be the vnode for the new
659 659 * executable when we call setexecenv(). However, if we get an error
660 660 * before that call we need to restore the execenv to its original
661 661 * values so that when we return to the caller fop_close() works
662 662 * properly while cleaning up from the failed exec(). Restoring the
663 663 * original value will also properly decrement the 2nd VN_RELE that we
664 664 * took on the brand library.
665 665 */
666 666 origenv.ex_bssbase = p->p_bssbase;
667 667 origenv.ex_brkbase = p->p_brkbase;
668 668 origenv.ex_brksize = p->p_brksize;
669 669 origenv.ex_vp = p->p_exec;
670 670 orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
671 671 orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
672 672 orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
673 673
674 674 if (args->to_model == DATAMODEL_NATIVE) {
675 675 err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
676 676 setid, exec_file, cred, brand_action);
677 677 }
678 678 #if defined(_LP64)
679 679 else {
680 680 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
681 681 setid, exec_file, cred, brand_action);
682 682 }
683 683 #endif /* _LP64 */
684 684 VN_RELE(nvp);
685 685 if (err != 0) {
686 686 restoreexecenv(&origenv, &orig_sigaltstack);
687 687 return (err);
688 688 }
689 689
690 690 /*
691 691 * The u_auxv veCTors are set up by elfexec to point to the
692 692 * brand emulation library and linker. Save these so they can
693 693 * be copied to the specific brand aux vectors.
694 694 */
695 695 bzero(&sed, sizeof (sed));
696 696 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
697 697 switch (up->u_auxv[i].a_type) {
698 698 case AT_SUN_LDDATA:
699 699 sed.sed_lddata = up->u_auxv[i].a_un.a_val;
700 700 break;
701 701 case AT_BASE:
702 702 sed.sed_base = up->u_auxv[i].a_un.a_val;
703 703 break;
704 704 case AT_ENTRY:
705 705 sed.sed_entry = up->u_auxv[i].a_un.a_val;
706 706 break;
707 707 case AT_PHDR:
708 708 sed.sed_phdr = up->u_auxv[i].a_un.a_val;
709 709 break;
710 710 case AT_PHENT:
711 711 sed.sed_phent = up->u_auxv[i].a_un.a_val;
712 712 break;
713 713 case AT_PHNUM:
714 714 sed.sed_phnum = up->u_auxv[i].a_un.a_val;
715 715 break;
716 716 default:
717 717 break;
718 718 }
719 719 }
720 720 /* Make sure the emulator has an entry point */
721 721 ASSERT(sed.sed_entry != NULL);
722 722 ASSERT(sed.sed_phdr != NULL);
723 723
724 724 bzero(&env, sizeof (env));
725 725 if (args->to_model == DATAMODEL_NATIVE) {
726 726 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
727 727 &voffset, exec_file, &interp, &env.ex_bssbase,
728 728 &env.ex_brkbase, &env.ex_brksize, NULL);
729 729 }
730 730 #if defined(_LP64)
731 731 else {
732 732 Elf32_Ehdr ehdr32;
733 733 Elf32_Addr uphdr_vaddr32;
734 734 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
735 735 &voffset, exec_file, &interp, &env.ex_bssbase,
736 736 &env.ex_brkbase, &env.ex_brksize, NULL);
737 737 Ehdr32to64(&ehdr32, &ehdr);
738 738
739 739 if (uphdr_vaddr32 == (Elf32_Addr)-1)
740 740 uphdr_vaddr = (Addr)-1;
741 741 else
742 742 uphdr_vaddr = uphdr_vaddr32;
743 743 }
744 744 #endif /* _LP64 */
745 745 if (err != 0) {
746 746 restoreexecenv(&origenv, &orig_sigaltstack);
747 747 return (err);
748 748 }
749 749
750 750 /*
751 751 * Save off the important properties of the executable. The
752 752 * brand library will ask us for this data later, when it is
753 753 * initializing and getting ready to transfer control to the
754 754 * brand application.
755 755 */
756 756 if (uphdr_vaddr == (Addr)-1)
757 757 sedp->sed_phdr = voffset + ehdr.e_phoff;
758 758 else
759 759 sedp->sed_phdr = voffset + uphdr_vaddr;
760 760 sedp->sed_entry = voffset + ehdr.e_entry;
761 761 sedp->sed_phent = ehdr.e_phentsize;
762 762 sedp->sed_phnum = ehdr.e_phnum;
763 763
764 764 if (interp) {
765 765 if (ehdr.e_type == ET_DYN) {
766 766 /*
767 767 * This is a shared object executable, so we
768 768 * need to pick a reasonable place to put the
769 769 * heap. Just don't use the first page.
770 770 */
771 771 env.ex_brkbase = (caddr_t)PAGESIZE;
772 772 env.ex_bssbase = (caddr_t)PAGESIZE;
773 773 }
774 774
775 775 /*
776 776 * If the program needs an interpreter (most do), map
777 777 * it in and store relevant information about it in the
778 778 * aux vector, where the brand library can find it.
779 779 */
780 780 if ((err = lookupname(linker, UIO_SYSSPACE,
781 781 FOLLOW, NULLVPP, &nvp)) != 0) {
782 782 uprintf("%s: not found.", brandlinker);
783 783 restoreexecenv(&origenv, &orig_sigaltstack);
784 784 return (err);
785 785 }
786 786 if (args->to_model == DATAMODEL_NATIVE) {
787 787 err = mapexec_brand(nvp, args, &ehdr,
788 788 &uphdr_vaddr, &voffset, exec_file, &interp,
789 789 NULL, NULL, NULL, &lddata);
790 790 }
791 791 #if defined(_LP64)
792 792 else {
793 793 Elf32_Ehdr ehdr32;
794 794 Elf32_Addr uphdr_vaddr32;
795 795 err = mapexec32_brand(nvp, args, &ehdr32,
796 796 &uphdr_vaddr32, &voffset, exec_file, &interp,
797 797 NULL, NULL, NULL, &lddata);
798 798 Ehdr32to64(&ehdr32, &ehdr);
799 799
800 800 if (uphdr_vaddr32 == (Elf32_Addr)-1)
801 801 uphdr_vaddr = (Addr)-1;
802 802 else
803 803 uphdr_vaddr = uphdr_vaddr32;
804 804 }
805 805 #endif /* _LP64 */
806 806 VN_RELE(nvp);
807 807 if (err != 0) {
808 808 restoreexecenv(&origenv, &orig_sigaltstack);
809 809 return (err);
810 810 }
811 811
812 812 /*
813 813 * Now that we know the base address of the brand's
814 814 * linker, place it in the aux vector.
815 815 */
816 816 sedp->sed_base = voffset;
817 817 sedp->sed_ldentry = voffset + ehdr.e_entry;
818 818 sedp->sed_lddata = voffset + lddata;
819 819 } else {
820 820 /*
821 821 * This program has no interpreter. The brand library
822 822 * will jump to the address in the AT_SUN_BRAND_LDENTRY
823 823 * aux vector, so in this case, put the entry point of
824 824 * the main executable there.
825 825 */
826 826 if (ehdr.e_type == ET_EXEC) {
827 827 /*
828 828 * An executable with no interpreter, this must
829 829 * be a statically linked executable, which
830 830 * means we loaded it at the address specified
831 831 * in the elf header, in which case the e_entry
832 832 * field of the elf header is an absolute
833 833 * address.
834 834 */
835 835 sedp->sed_ldentry = ehdr.e_entry;
836 836 sedp->sed_entry = ehdr.e_entry;
837 837 sedp->sed_lddata = NULL;
838 838 sedp->sed_base = NULL;
839 839 } else {
840 840 /*
841 841 * A shared object with no interpreter, we use
842 842 * the calculated address from above.
843 843 */
844 844 sedp->sed_ldentry = sedp->sed_entry;
845 845 sedp->sed_entry = NULL;
846 846 sedp->sed_phdr = NULL;
847 847 sedp->sed_phent = NULL;
848 848 sedp->sed_phnum = NULL;
849 849 sedp->sed_lddata = NULL;
850 850 sedp->sed_base = voffset;
851 851
852 852 if (ehdr.e_type == ET_DYN) {
853 853 /*
854 854 * Delay setting the brkbase until the
855 855 * first call to brk(); see elfexec()
856 856 * for details.
857 857 */
858 858 env.ex_bssbase = (caddr_t)0;
859 859 env.ex_brkbase = (caddr_t)0;
860 860 env.ex_brksize = 0;
861 861 }
862 862 }
863 863 }
864 864
865 865 env.ex_magic = elfmagic;
866 866 env.ex_vp = vp;
867 867 setexecenv(&env);
868 868
869 869 /*
870 870 * It's time to manipulate the process aux vectors. First
871 871 * we need to update the AT_SUN_AUXFLAGS aux vector to set
872 872 * the AF_SUN_NOPLM flag.
873 873 */
874 874 if (args->to_model == DATAMODEL_NATIVE) {
875 875 auxv_t auxflags_auxv;
876 876
877 877 if (copyin(args->auxp_auxflags, &auxflags_auxv,
878 878 sizeof (auxflags_auxv)) != 0)
879 879 return (EFAULT);
880 880
881 881 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
882 882 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
883 883 if (copyout(&auxflags_auxv, args->auxp_auxflags,
884 884 sizeof (auxflags_auxv)) != 0)
885 885 return (EFAULT);
886 886 }
887 887 #if defined(_LP64)
888 888 else {
889 889 auxv32_t auxflags_auxv32;
890 890
891 891 if (copyin(args->auxp_auxflags, &auxflags_auxv32,
892 892 sizeof (auxflags_auxv32)) != 0)
893 893 return (EFAULT);
894 894
895 895 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
896 896 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
897 897 if (copyout(&auxflags_auxv32, args->auxp_auxflags,
898 898 sizeof (auxflags_auxv32)) != 0)
899 899 return (EFAULT);
900 900 }
901 901 #endif /* _LP64 */
902 902
903 903 /* Second, copy out the brand specific aux vectors. */
904 904 if (args->to_model == DATAMODEL_NATIVE) {
905 905 auxv_t brand_auxv[] = {
906 906 { AT_SUN_BRAND_AUX1, 0 },
907 907 { AT_SUN_BRAND_AUX2, 0 },
908 908 { AT_SUN_BRAND_AUX3, 0 }
909 909 };
910 910
911 911 ASSERT(brand_auxv[0].a_type ==
912 912 AT_SUN_BRAND_COMMON_LDDATA);
913 913 brand_auxv[0].a_un.a_val = sed.sed_lddata;
914 914
915 915 if (copyout(&brand_auxv, args->auxp_brand,
916 916 sizeof (brand_auxv)) != 0)
917 917 return (EFAULT);
918 918 }
919 919 #if defined(_LP64)
920 920 else {
921 921 auxv32_t brand_auxv32[] = {
922 922 { AT_SUN_BRAND_AUX1, 0 },
923 923 { AT_SUN_BRAND_AUX2, 0 },
924 924 { AT_SUN_BRAND_AUX3, 0 }
925 925 };
926 926
927 927 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
928 928 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
929 929 if (copyout(&brand_auxv32, args->auxp_brand,
930 930 sizeof (brand_auxv32)) != 0)
931 931 return (EFAULT);
932 932 }
933 933 #endif /* _LP64 */
934 934
935 935 /*
936 936 * Third, the /proc aux vectors set up by elfexec() point to
937 937 * brand emulation library and it's linker. Copy these to the
938 938 * /proc brand specific aux vector, and update the regular
939 939 * /proc aux vectors to point to the executable (and it's
940 940 * linker). This will enable debuggers to access the
941 941 * executable via the usual /proc or elf notes aux vectors.
942 942 *
943 943 * The brand emulation library's linker will get it's aux
944 944 * vectors off the stack, and then update the stack with the
945 945 * executable's aux vectors before jumping to the executable's
946 946 * linker.
947 947 *
948 948 * Debugging the brand emulation library must be done from
949 949 * the global zone, where the librtld_db module knows how to
950 950 * fetch the brand specific aux vectors to access the brand
951 951 * emulation libraries linker.
952 952 */
953 953 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
954 954 ulong_t val;
955 955
956 956 switch (up->u_auxv[i].a_type) {
957 957 case AT_SUN_BRAND_COMMON_LDDATA:
958 958 up->u_auxv[i].a_un.a_val = sed.sed_lddata;
959 959 continue;
960 960 case AT_BASE:
961 961 val = sedp->sed_base;
962 962 break;
963 963 case AT_ENTRY:
964 964 val = sedp->sed_entry;
965 965 break;
966 966 case AT_PHDR:
967 967 val = sedp->sed_phdr;
968 968 break;
969 969 case AT_PHENT:
970 970 val = sedp->sed_phent;
971 971 break;
972 972 case AT_PHNUM:
973 973 val = sedp->sed_phnum;
974 974 break;
975 975 case AT_SUN_LDDATA:
976 976 val = sedp->sed_lddata;
977 977 break;
978 978 default:
979 979 continue;
980 980 }
981 981
982 982 up->u_auxv[i].a_un.a_val = val;
983 983 if (val == NULL) {
984 984 /* Hide the entry for static binaries */
985 985 up->u_auxv[i].a_type = AT_IGNORE;
986 986 }
987 987 }
988 988
989 989 /*
990 990 * The last thing we do here is clear spd->spd_handler. This
991 991 * is important because if we're already a branded process and
992 992 * if this exec succeeds, there is a window between when the
993 993 * exec() first returns to the userland of the new process and
994 994 * when our brand library get's initialized, during which we
995 995 * don't want system calls to be re-directed to our brand
996 996 * library since it hasn't been initialized yet.
997 997 */
998 998 spd->spd_handler = NULL;
999 999
1000 1000 return (0);
1001 1001 }
1002 1002
1003 1003 void
1004 1004 brand_solaris_exec(struct brand *pbrand)
1005 1005 {
1006 1006 brand_proc_data_t *spd = curproc->p_brand_data;
1007 1007
1008 1008 ASSERT(curproc->p_brand == pbrand);
1009 1009 ASSERT(curproc->p_brand_data != NULL);
1010 1010 ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1011 1011
1012 1012 /*
1013 1013 * We should only be called from exec(), when we know the process
1014 1014 * is single-threaded.
1015 1015 */
1016 1016 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1017 1017
1018 1018 /* Upon exec, reset our lwp brand data. */
1019 1019 (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1020 1020 (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1021 1021
1022 1022 /*
1023 1023 * Upon exec, reset all the proc brand data, except for the elf
1024 1024 * data associated with the executable we are exec'ing.
1025 1025 */
1026 1026 spd->spd_handler = NULL;
1027 1027 }
1028 1028
1029 1029 int
1030 1030 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1031 1031 struct brand *pbrand)
1032 1032 {
1033 1033 int err;
1034 1034
1035 1035 /*
1036 1036 * If there are any zones using this brand, we can't allow it
1037 1037 * to be unloaded.
1038 1038 */
1039 1039 if (brand_zone_count(pbrand))
1040 1040 return (EBUSY);
1041 1041
1042 1042 kmem_free(*emul_table, NSYSCALL);
1043 1043 *emul_table = NULL;
1044 1044
1045 1045 err = mod_remove(modlinkage);
1046 1046 if (err)
1047 1047 cmn_err(CE_WARN, "Couldn't unload brand module");
1048 1048
1049 1049 return (err);
1050 1050 }
1051 1051
1052 1052 /*ARGSUSED*/
1053 1053 void
1054 1054 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1055 1055 {
1056 1056 ASSERT(p->lwp_procp->p_brand == pbrand);
1057 1057 ASSERT(c->lwp_procp->p_brand == pbrand);
1058 1058
1059 1059 ASSERT(p->lwp_procp->p_brand_data != NULL);
1060 1060 ASSERT(c->lwp_procp->p_brand_data != NULL);
1061 1061
1062 1062 /*
1063 1063 * Both LWPs have already had been initialized via
1064 1064 * brand_solaris_initlwp().
1065 1065 */
1066 1066 ASSERT(p->lwp_brand != NULL);
1067 1067 ASSERT(c->lwp_brand != NULL);
1068 1068 }
1069 1069
1070 1070 /*ARGSUSED*/
1071 1071 void
1072 1072 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1073 1073 {
1074 1074 ASSERT(l->lwp_procp->p_brand == pbrand);
1075 1075 ASSERT(l->lwp_procp->p_brand_data != NULL);
1076 1076 ASSERT(l->lwp_brand != NULL);
1077 1077 l->lwp_brand = NULL;
1078 1078 }
1079 1079
1080 1080 /*ARGSUSED*/
1081 1081 int
1082 1082 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1083 1083 {
1084 1084 ASSERT(l->lwp_procp->p_brand == pbrand);
1085 1085 ASSERT(l->lwp_procp->p_brand_data != NULL);
1086 1086 ASSERT(l->lwp_brand == NULL);
1087 1087 l->lwp_brand = (void *)-1;
1088 1088 return (0);
1089 1089 }
1090 1090
1091 1091 /*ARGSUSED*/
1092 1092 void
1093 1093 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1094 1094 {
1095 1095 proc_t *p = l->lwp_procp;
1096 1096
1097 1097 ASSERT(l->lwp_procp->p_brand == pbrand);
1098 1098 ASSERT(l->lwp_procp->p_brand_data != NULL);
1099 1099 ASSERT(l->lwp_brand != NULL);
1100 1100
1101 1101 /*
1102 1102 * We should never be called for the last thread in a process.
1103 1103 * (That case is handled by brand_solaris_proc_exit().)
1104 1104 * Therefore this lwp must be exiting from a multi-threaded
1105 1105 * process.
1106 1106 */
1107 1107 ASSERT(p->p_tlist != p->p_tlist->t_forw);
1108 1108
1109 1109 l->lwp_brand = NULL;
1110 1110 }
1111 1111
1112 1112 /*ARGSUSED*/
1113 1113 void
1114 1114 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1115 1115 {
1116 1116 ASSERT(p->p_brand == pbrand);
1117 1117 ASSERT(p->p_brand_data != NULL);
1118 1118
1119 1119 /*
1120 1120 * When called from proc_exit(), we know that process is
1121 1121 * single-threaded and free our lwp brand data.
1122 1122 * otherwise just free p_brand_data and return.
1123 1123 */
1124 1124 if (l != NULL) {
1125 1125 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1126 1126 ASSERT(p->p_tlist->t_lwp == l);
1127 1127 (void) brand_solaris_freelwp(l, pbrand);
1128 1128 }
1129 1129
1130 1130 /* upon exit, free our proc brand data */
1131 1131 kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1132 1132 p->p_brand_data = NULL;
1133 1133 }
1134 1134
1135 1135 void
1136 1136 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1137 1137 {
1138 1138 ASSERT(p->p_brand == pbrand);
1139 1139 ASSERT(p->p_brand_data == NULL);
1140 1140
1141 1141 /*
1142 1142 * We should only be called from exec(), when we know the process
1143 1143 * is single-threaded.
1144 1144 */
1145 1145 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1146 1146
1147 1147 p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1148 1148 (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1149 1149 }
↓ open down ↓ |
1091 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX